diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..7fe8a795c536b259b887a62f7554dd7c2005edfc
Binary files /dev/null and b/.DS_Store differ
diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..901b023557ddecf4cbf7f59443cb48582aba25a1 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mlmodelc/* filter=lfs diff=lfs merge=lfs -text
diff --git a/base.en/.DS_Store b/base.en/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..2a4d5b10e7293a2c57aef21bfe774209a226db00
Binary files /dev/null and b/base.en/.DS_Store differ
diff --git a/base.en/ggml-base.en-encoder.mlmodelc/analytics/coremldata.bin b/base.en/ggml-base.en-encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d0a2a431499e74b9ebb3e9c187d6a8e30ed99faa
--- /dev/null
+++ b/base.en/ggml-base.en-encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bac2e6295226f3bec590131e715797af43a72773913adb05f1da7c302794858
+size 243
diff --git a/base.en/ggml-base.en-encoder.mlmodelc/coremldata.bin b/base.en/ggml-base.en-encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..208f1486e03a00183268dd0c67064971d3651772
--- /dev/null
+++ b/base.en/ggml-base.en-encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3dadf937987b5a4db9fa964ba6ddb17c4085b581cf394b8930c13139b79db093
+size 392
diff --git a/base.en/ggml-base.en-encoder.mlmodelc/metadata.json b/base.en/ggml-base.en-encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..e16fecbb963abbb1e5074028019d5caf260f4475
--- /dev/null
+++ b/base.en/ggml-base.en-encoder.mlmodelc/metadata.json
@@ -0,0 +1,72 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1500 × 512)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 512]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 6,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 6,
+      "Gelu" : 8,
+      "LayerNorm" : 13,
+      "Transpose" : 7,
+      "Softmax" : 48,
+      "Squeeze" : 1,
+      "Cast" : 2,
+      "Add" : 13,
+      "Einsum" : 96,
+      "ExpandDims" : 1,
+      "Split" : 18,
+      "Conv" : 38
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "12.0",
+      "tvOS" : "15.0",
+      "visionOS" : "1.0",
+      "watchOS" : "8.0",
+      "iOS" : "15.0",
+      "macCatalyst" : "15.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2025-12-07",
+      "com.github.apple.coremltools.source" : "torch==2.10.0.dev20251207",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 80 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "coreml_encoder_base_en",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/base.en/ggml-base.en-encoder.mlmodelc/model.mil b/base.en/ggml-base.en-encoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..9809ec63335e5dd4729ed04b904d3020710081e3
--- /dev/null
+++ b/base.en/ggml-base.en-encoder.mlmodelc/model.mil
@@ -0,0 +1,733 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.10.0.dev20251207"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
+{
+    func main<ios15>(tensor<fp32, [1, 80, 3000]> logmel_data) {
+            tensor<string, []> var_32_pad_type_0 = const()[name = tensor<string, []>("op_32_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_32_pad_0 = const()[name = tensor<string, []>("op_32_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_32_strides_0 = const()[name = tensor<string, []>("op_32_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_32_dilations_0 = const()[name = tensor<string, []>("op_32_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_32_groups_0 = const()[name = tensor<string, []>("op_32_groups_0"), val = tensor<int32, []>(1)];
+            tensor<string, []> logmel_data_to_fp16_dtype_0 = const()[name = tensor<string, []>("logmel_data_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [512, 80, 3]> const_0_to_fp16 = const()[name = tensor<string, []>("const_0_to_fp16"), val = tensor<fp16, [512, 80, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [512]> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245888)))];
+            tensor<fp16, [1, 80, 3000]> logmel_data_to_fp16 = cast(dtype = logmel_data_to_fp16_dtype_0, x = logmel_data)[name = tensor<string, []>("cast_27")];
+            tensor<fp16, [1, 512, 3000]> var_32_cast_fp16 = conv(bias = const_1_to_fp16, dilations = var_32_dilations_0, groups = var_32_groups_0, pad = var_32_pad_0, pad_type = var_32_pad_type_0, strides = var_32_strides_0, weight = const_0_to_fp16, x = logmel_data_to_fp16)[name = tensor<string, []>("op_32_cast_fp16")];
+            tensor<string, []> input_1_mode_0 = const()[name = tensor<string, []>("input_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 512, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_32_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<string, []> var_50_pad_type_0 = const()[name = tensor<string, []>("op_50_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_50_pad_0 = const()[name = tensor<string, []>("op_50_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_50_strides_0 = const()[name = tensor<string, []>("op_50_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_50_dilations_0 = const()[name = tensor<string, []>("op_50_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_50_groups_0 = const()[name = tensor<string, []>("op_50_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 3]> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, [512, 512, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(246976)))];
+            tensor<fp16, [512]> const_3_to_fp16 = const()[name = tensor<string, []>("const_3_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1819904)))];
+            tensor<fp16, [1, 512, 1500]> var_50_cast_fp16 = conv(bias = const_3_to_fp16, dilations = var_50_dilations_0, groups = var_50_groups_0, pad = var_50_pad_0, pad_type = var_50_pad_type_0, strides = var_50_strides_0, weight = const_2_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("op_50_cast_fp16")];
+            tensor<string, []> x_3_mode_0 = const()[name = tensor<string, []>("x_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 512, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_50_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
+            tensor<fp16, [512, 1500]> var_55_to_fp16 = const()[name = tensor<string, []>("op_55_to_fp16"), val = tensor<fp16, [512, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1820992)))];
+            tensor<fp16, [1, 512, 1500]> var_57_cast_fp16 = add(x = x_3_cast_fp16, y = var_55_to_fp16)[name = tensor<string, []>("op_57_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_57_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_72 = const()[name = tensor<string, []>("op_72"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3357056)))];
+            tensor<fp16, [512]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3358144)))];
+            tensor<fp16, []> var_88_to_fp16 = const()[name = tensor<string, []>("op_88_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_3_cast_fp16 = layer_norm(axes = input_3_axes_0, beta = input_3_beta_0_to_fp16, epsilon = var_88_to_fp16, gamma = input_3_gamma_0_to_fp16, x = inputs_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<string, []> q_1_pad_type_0 = const()[name = tensor<string, []>("q_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_1_strides_0 = const()[name = tensor<string, []>("q_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_1_pad_0 = const()[name = tensor<string, []>("q_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_1_dilations_0 = const()[name = tensor<string, []>("q_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_1_groups_0 = const()[name = tensor<string, []>("q_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> var_123_weight_0_to_fp16 = const()[name = tensor<string, []>("op_123_weight_0_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3359232)))];
+            tensor<fp16, [512]> var_123_bias_0_to_fp16 = const()[name = tensor<string, []>("op_123_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3883584)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_123_cast_fp16 = conv(bias = var_123_bias_0_to_fp16, dilations = q_1_dilations_0, groups = q_1_groups_0, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = q_1_strides_0, weight = var_123_weight_0_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_123_cast_fp16")];
+            tensor<string, []> k_1_pad_type_0 = const()[name = tensor<string, []>("k_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_1_strides_0 = const()[name = tensor<string, []>("k_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_1_pad_0 = const()[name = tensor<string, []>("k_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_1_dilations_0 = const()[name = tensor<string, []>("k_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_1_groups_0 = const()[name = tensor<string, []>("k_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_0_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_key_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3884672)))];
+            tensor<fp16, [1, 512, 1, 1500]> k_1_cast_fp16 = conv(dilations = k_1_dilations_0, groups = k_1_groups_0, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = k_1_strides_0, weight = blocks_0_attn_key_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("k_1_cast_fp16")];
+            tensor<string, []> var_121_pad_type_0 = const()[name = tensor<string, []>("op_121_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_121_strides_0 = const()[name = tensor<string, []>("op_121_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_121_pad_0 = const()[name = tensor<string, []>("op_121_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_121_dilations_0 = const()[name = tensor<string, []>("op_121_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_121_groups_0 = const()[name = tensor<string, []>("op_121_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_0_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4409024)))];
+            tensor<fp16, [512]> blocks_0_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4933376)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_121_cast_fp16 = conv(bias = blocks_0_attn_value_bias_to_fp16, dilations = var_121_dilations_0, groups = var_121_groups_0, pad = var_121_pad_0, pad_type = var_121_pad_type_0, strides = var_121_strides_0, weight = blocks_0_attn_value_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_121_cast_fp16")];
+            tensor<int32, [8]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_124_axis_0 = const()[name = tensor<string, []>("op_124_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_124_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_124_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_124_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_124_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_124_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_124_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_124_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_124_cast_fp16_7 = split(axis = var_124_axis_0, split_sizes = tile_0, x = var_123_cast_fp16)[name = tensor<string, []>("op_124_cast_fp16")];
+            tensor<int32, [4]> var_133_perm_0 = const()[name = tensor<string, []>("op_133_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [8]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_134_axis_0 = const()[name = tensor<string, []>("op_134_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 512]> var_133_cast_fp16 = transpose(perm = var_133_perm_0, x = k_1_cast_fp16)[name = tensor<string, []>("transpose_6")];
+            tensor<fp16, [1, 1500, 1, 64]> var_134_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_134_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_134_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_134_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_134_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_134_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_134_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_134_cast_fp16_7 = split(axis = var_134_axis_0, split_sizes = tile_1, x = var_133_cast_fp16)[name = tensor<string, []>("op_134_cast_fp16")];
+            tensor<int32, [8]> tile_2 = const()[name = tensor<string, []>("tile_2"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_143_axis_0 = const()[name = tensor<string, []>("op_143_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_143_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_143_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_143_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_143_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_143_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_143_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_143_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_143_cast_fp16_7 = split(axis = var_143_axis_0, split_sizes = tile_2, x = var_121_cast_fp16)[name = tensor<string, []>("op_143_cast_fp16")];
+            tensor<string, []> aw_1_equation_0 = const()[name = tensor<string, []>("aw_1_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1_cast_fp16 = einsum(equation = aw_1_equation_0, values = (var_134_cast_fp16_0, var_124_cast_fp16_0))[name = tensor<string, []>("aw_1_cast_fp16")];
+            tensor<string, []> aw_3_equation_0 = const()[name = tensor<string, []>("aw_3_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_3_cast_fp16 = einsum(equation = aw_3_equation_0, values = (var_134_cast_fp16_1, var_124_cast_fp16_1))[name = tensor<string, []>("aw_3_cast_fp16")];
+            tensor<string, []> aw_5_equation_0 = const()[name = tensor<string, []>("aw_5_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_5_cast_fp16 = einsum(equation = aw_5_equation_0, values = (var_134_cast_fp16_2, var_124_cast_fp16_2))[name = tensor<string, []>("aw_5_cast_fp16")];
+            tensor<string, []> aw_7_equation_0 = const()[name = tensor<string, []>("aw_7_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_7_cast_fp16 = einsum(equation = aw_7_equation_0, values = (var_134_cast_fp16_3, var_124_cast_fp16_3))[name = tensor<string, []>("aw_7_cast_fp16")];
+            tensor<string, []> aw_9_equation_0 = const()[name = tensor<string, []>("aw_9_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_9_cast_fp16 = einsum(equation = aw_9_equation_0, values = (var_134_cast_fp16_4, var_124_cast_fp16_4))[name = tensor<string, []>("aw_9_cast_fp16")];
+            tensor<string, []> aw_11_equation_0 = const()[name = tensor<string, []>("aw_11_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_11_cast_fp16 = einsum(equation = aw_11_equation_0, values = (var_134_cast_fp16_5, var_124_cast_fp16_5))[name = tensor<string, []>("aw_11_cast_fp16")];
+            tensor<string, []> aw_13_equation_0 = const()[name = tensor<string, []>("aw_13_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_13_cast_fp16 = einsum(equation = aw_13_equation_0, values = (var_134_cast_fp16_6, var_124_cast_fp16_6))[name = tensor<string, []>("aw_13_cast_fp16")];
+            tensor<string, []> aw_15_equation_0 = const()[name = tensor<string, []>("aw_15_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_15_cast_fp16 = einsum(equation = aw_15_equation_0, values = (var_134_cast_fp16_7, var_124_cast_fp16_7))[name = tensor<string, []>("aw_15_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_168_cast_fp16 = softmax(axis = var_72, x = aw_1_cast_fp16)[name = tensor<string, []>("op_168_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_169_cast_fp16 = softmax(axis = var_72, x = aw_3_cast_fp16)[name = tensor<string, []>("op_169_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_170_cast_fp16 = softmax(axis = var_72, x = aw_5_cast_fp16)[name = tensor<string, []>("op_170_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_171_cast_fp16 = softmax(axis = var_72, x = aw_7_cast_fp16)[name = tensor<string, []>("op_171_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_172_cast_fp16 = softmax(axis = var_72, x = aw_9_cast_fp16)[name = tensor<string, []>("op_172_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_173_cast_fp16 = softmax(axis = var_72, x = aw_11_cast_fp16)[name = tensor<string, []>("op_173_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_174_cast_fp16 = softmax(axis = var_72, x = aw_13_cast_fp16)[name = tensor<string, []>("op_174_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_175_cast_fp16 = softmax(axis = var_72, x = aw_15_cast_fp16)[name = tensor<string, []>("op_175_cast_fp16")];
+            tensor<string, []> var_177_equation_0 = const()[name = tensor<string, []>("op_177_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_177_cast_fp16 = einsum(equation = var_177_equation_0, values = (var_143_cast_fp16_0, var_168_cast_fp16))[name = tensor<string, []>("op_177_cast_fp16")];
+            tensor<string, []> var_179_equation_0 = const()[name = tensor<string, []>("op_179_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_179_cast_fp16 = einsum(equation = var_179_equation_0, values = (var_143_cast_fp16_1, var_169_cast_fp16))[name = tensor<string, []>("op_179_cast_fp16")];
+            tensor<string, []> var_181_equation_0 = const()[name = tensor<string, []>("op_181_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_181_cast_fp16 = einsum(equation = var_181_equation_0, values = (var_143_cast_fp16_2, var_170_cast_fp16))[name = tensor<string, []>("op_181_cast_fp16")];
+            tensor<string, []> var_183_equation_0 = const()[name = tensor<string, []>("op_183_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_183_cast_fp16 = einsum(equation = var_183_equation_0, values = (var_143_cast_fp16_3, var_171_cast_fp16))[name = tensor<string, []>("op_183_cast_fp16")];
+            tensor<string, []> var_185_equation_0 = const()[name = tensor<string, []>("op_185_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_185_cast_fp16 = einsum(equation = var_185_equation_0, values = (var_143_cast_fp16_4, var_172_cast_fp16))[name = tensor<string, []>("op_185_cast_fp16")];
+            tensor<string, []> var_187_equation_0 = const()[name = tensor<string, []>("op_187_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_187_cast_fp16 = einsum(equation = var_187_equation_0, values = (var_143_cast_fp16_5, var_173_cast_fp16))[name = tensor<string, []>("op_187_cast_fp16")];
+            tensor<string, []> var_189_equation_0 = const()[name = tensor<string, []>("op_189_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_189_cast_fp16 = einsum(equation = var_189_equation_0, values = (var_143_cast_fp16_6, var_174_cast_fp16))[name = tensor<string, []>("op_189_cast_fp16")];
+            tensor<string, []> var_191_equation_0 = const()[name = tensor<string, []>("op_191_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_191_cast_fp16 = einsum(equation = var_191_equation_0, values = (var_143_cast_fp16_7, var_175_cast_fp16))[name = tensor<string, []>("op_191_cast_fp16")];
+            tensor<bool, []> input_5_interleave_0 = const()[name = tensor<string, []>("input_5_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_5_cast_fp16 = concat(axis = var_72, interleave = input_5_interleave_0, values = (var_177_cast_fp16, var_179_cast_fp16, var_181_cast_fp16, var_183_cast_fp16, var_185_cast_fp16, var_187_cast_fp16, var_189_cast_fp16, var_191_cast_fp16))[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> var_200_pad_type_0 = const()[name = tensor<string, []>("op_200_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_200_strides_0 = const()[name = tensor<string, []>("op_200_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_200_pad_0 = const()[name = tensor<string, []>("op_200_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_200_dilations_0 = const()[name = tensor<string, []>("op_200_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_200_groups_0 = const()[name = tensor<string, []>("op_200_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_0_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4934464)))];
+            tensor<fp16, [512]> blocks_0_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5458816)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_200_cast_fp16 = conv(bias = blocks_0_attn_out_bias_to_fp16, dilations = var_200_dilations_0, groups = var_200_groups_0, pad = var_200_pad_0, pad_type = var_200_pad_type_0, strides = var_200_strides_0, weight = blocks_0_attn_out_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("op_200_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = var_200_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> input_7_axes_0 = const()[name = tensor<string, []>("input_7_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_7_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_7_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5459904)))];
+            tensor<fp16, [512]> input_7_beta_0_to_fp16 = const()[name = tensor<string, []>("input_7_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5460992)))];
+            tensor<fp16, []> var_210_to_fp16 = const()[name = tensor<string, []>("op_210_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = input_7_beta_0_to_fp16, epsilon = var_210_to_fp16, gamma = input_7_gamma_0_to_fp16, x = inputs_3_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_pad_type_0 = const()[name = tensor<string, []>("input_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_9_strides_0 = const()[name = tensor<string, []>("input_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_9_pad_0 = const()[name = tensor<string, []>("input_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_9_dilations_0 = const()[name = tensor<string, []>("input_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_9_groups_0 = const()[name = tensor<string, []>("input_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [2048, 512, 1, 1]> blocks_0_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5462080)))];
+            tensor<fp16, [2048]> blocks_0_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7559296)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_9_cast_fp16 = conv(bias = blocks_0_mlp_0_bias_to_fp16, dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = blocks_0_mlp_0_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<string, []> input_11_mode_0 = const()[name = tensor<string, []>("input_11_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_11_cast_fp16 = gelu(mode = input_11_mode_0, x = input_9_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<string, []> var_236_pad_type_0 = const()[name = tensor<string, []>("op_236_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_236_strides_0 = const()[name = tensor<string, []>("op_236_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_236_pad_0 = const()[name = tensor<string, []>("op_236_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_236_dilations_0 = const()[name = tensor<string, []>("op_236_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_236_groups_0 = const()[name = tensor<string, []>("op_236_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 2048, 1, 1]> blocks_0_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7563456)))];
+            tensor<fp16, [512]> blocks_0_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9660672)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_236_cast_fp16 = conv(bias = blocks_0_mlp_2_bias_to_fp16, dilations = var_236_dilations_0, groups = var_236_groups_0, pad = var_236_pad_0, pad_type = var_236_pad_type_0, strides = var_236_strides_0, weight = blocks_0_mlp_2_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("op_236_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = var_236_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_245 = const()[name = tensor<string, []>("op_245"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_13_axes_0 = const()[name = tensor<string, []>("input_13_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_13_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9661760)))];
+            tensor<fp16, [512]> input_13_beta_0_to_fp16 = const()[name = tensor<string, []>("input_13_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9662848)))];
+            tensor<fp16, []> var_261_to_fp16 = const()[name = tensor<string, []>("op_261_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = input_13_beta_0_to_fp16, epsilon = var_261_to_fp16, gamma = input_13_gamma_0_to_fp16, x = inputs_5_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> q_3_pad_type_0 = const()[name = tensor<string, []>("q_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_3_strides_0 = const()[name = tensor<string, []>("q_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_3_pad_0 = const()[name = tensor<string, []>("q_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_3_dilations_0 = const()[name = tensor<string, []>("q_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_3_groups_0 = const()[name = tensor<string, []>("q_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> var_296_weight_0_to_fp16 = const()[name = tensor<string, []>("op_296_weight_0_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9663936)))];
+            tensor<fp16, [512]> var_296_bias_0_to_fp16 = const()[name = tensor<string, []>("op_296_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10188288)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_296_cast_fp16 = conv(bias = var_296_bias_0_to_fp16, dilations = q_3_dilations_0, groups = q_3_groups_0, pad = q_3_pad_0, pad_type = q_3_pad_type_0, strides = q_3_strides_0, weight = var_296_weight_0_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_296_cast_fp16")];
+            tensor<string, []> k_3_pad_type_0 = const()[name = tensor<string, []>("k_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_3_strides_0 = const()[name = tensor<string, []>("k_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_3_pad_0 = const()[name = tensor<string, []>("k_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_3_dilations_0 = const()[name = tensor<string, []>("k_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_3_groups_0 = const()[name = tensor<string, []>("k_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_1_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_key_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10189376)))];
+            tensor<fp16, [1, 512, 1, 1500]> k_3_cast_fp16 = conv(dilations = k_3_dilations_0, groups = k_3_groups_0, pad = k_3_pad_0, pad_type = k_3_pad_type_0, strides = k_3_strides_0, weight = blocks_1_attn_key_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("k_3_cast_fp16")];
+            tensor<string, []> var_294_pad_type_0 = const()[name = tensor<string, []>("op_294_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_294_strides_0 = const()[name = tensor<string, []>("op_294_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_294_pad_0 = const()[name = tensor<string, []>("op_294_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_294_dilations_0 = const()[name = tensor<string, []>("op_294_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_294_groups_0 = const()[name = tensor<string, []>("op_294_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_1_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10713728)))];
+            tensor<fp16, [512]> blocks_1_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11238080)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_294_cast_fp16 = conv(bias = blocks_1_attn_value_bias_to_fp16, dilations = var_294_dilations_0, groups = var_294_groups_0, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_294_strides_0, weight = blocks_1_attn_value_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_294_cast_fp16")];
+            tensor<int32, [8]> tile_3 = const()[name = tensor<string, []>("tile_3"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_297_axis_0 = const()[name = tensor<string, []>("op_297_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_297_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_297_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_297_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_297_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_297_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_297_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_297_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_297_cast_fp16_7 = split(axis = var_297_axis_0, split_sizes = tile_3, x = var_296_cast_fp16)[name = tensor<string, []>("op_297_cast_fp16")];
+            tensor<int32, [4]> var_306_perm_0 = const()[name = tensor<string, []>("op_306_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [8]> tile_4 = const()[name = tensor<string, []>("tile_4"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_307_axis_0 = const()[name = tensor<string, []>("op_307_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 512]> var_306_cast_fp16 = transpose(perm = var_306_perm_0, x = k_3_cast_fp16)[name = tensor<string, []>("transpose_5")];
+            tensor<fp16, [1, 1500, 1, 64]> var_307_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_307_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_307_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_307_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_307_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_307_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_307_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_307_cast_fp16_7 = split(axis = var_307_axis_0, split_sizes = tile_4, x = var_306_cast_fp16)[name = tensor<string, []>("op_307_cast_fp16")];
+            tensor<int32, [8]> tile_5 = const()[name = tensor<string, []>("tile_5"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_316_axis_0 = const()[name = tensor<string, []>("op_316_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_316_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_316_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_316_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_316_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_316_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_316_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_316_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_316_cast_fp16_7 = split(axis = var_316_axis_0, split_sizes = tile_5, x = var_294_cast_fp16)[name = tensor<string, []>("op_316_cast_fp16")];
+            tensor<string, []> aw_17_equation_0 = const()[name = tensor<string, []>("aw_17_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_17_cast_fp16 = einsum(equation = aw_17_equation_0, values = (var_307_cast_fp16_0, var_297_cast_fp16_0))[name = tensor<string, []>("aw_17_cast_fp16")];
+            tensor<string, []> aw_19_equation_0 = const()[name = tensor<string, []>("aw_19_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_19_cast_fp16 = einsum(equation = aw_19_equation_0, values = (var_307_cast_fp16_1, var_297_cast_fp16_1))[name = tensor<string, []>("aw_19_cast_fp16")];
+            tensor<string, []> aw_21_equation_0 = const()[name = tensor<string, []>("aw_21_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_21_cast_fp16 = einsum(equation = aw_21_equation_0, values = (var_307_cast_fp16_2, var_297_cast_fp16_2))[name = tensor<string, []>("aw_21_cast_fp16")];
+            tensor<string, []> aw_23_equation_0 = const()[name = tensor<string, []>("aw_23_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_23_cast_fp16 = einsum(equation = aw_23_equation_0, values = (var_307_cast_fp16_3, var_297_cast_fp16_3))[name = tensor<string, []>("aw_23_cast_fp16")];
+            tensor<string, []> aw_25_equation_0 = const()[name = tensor<string, []>("aw_25_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_25_cast_fp16 = einsum(equation = aw_25_equation_0, values = (var_307_cast_fp16_4, var_297_cast_fp16_4))[name = tensor<string, []>("aw_25_cast_fp16")];
+            tensor<string, []> aw_27_equation_0 = const()[name = tensor<string, []>("aw_27_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_27_cast_fp16 = einsum(equation = aw_27_equation_0, values = (var_307_cast_fp16_5, var_297_cast_fp16_5))[name = tensor<string, []>("aw_27_cast_fp16")];
+            tensor<string, []> aw_29_equation_0 = const()[name = tensor<string, []>("aw_29_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_29_cast_fp16 = einsum(equation = aw_29_equation_0, values = (var_307_cast_fp16_6, var_297_cast_fp16_6))[name = tensor<string, []>("aw_29_cast_fp16")];
+            tensor<string, []> aw_31_equation_0 = const()[name = tensor<string, []>("aw_31_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_31_cast_fp16 = einsum(equation = aw_31_equation_0, values = (var_307_cast_fp16_7, var_297_cast_fp16_7))[name = tensor<string, []>("aw_31_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_341_cast_fp16 = softmax(axis = var_245, x = aw_17_cast_fp16)[name = tensor<string, []>("op_341_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_342_cast_fp16 = softmax(axis = var_245, x = aw_19_cast_fp16)[name = tensor<string, []>("op_342_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_343_cast_fp16 = softmax(axis = var_245, x = aw_21_cast_fp16)[name = tensor<string, []>("op_343_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_344_cast_fp16 = softmax(axis = var_245, x = aw_23_cast_fp16)[name = tensor<string, []>("op_344_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_345_cast_fp16 = softmax(axis = var_245, x = aw_25_cast_fp16)[name = tensor<string, []>("op_345_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_346_cast_fp16 = softmax(axis = var_245, x = aw_27_cast_fp16)[name = tensor<string, []>("op_346_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_347_cast_fp16 = softmax(axis = var_245, x = aw_29_cast_fp16)[name = tensor<string, []>("op_347_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_348_cast_fp16 = softmax(axis = var_245, x = aw_31_cast_fp16)[name = tensor<string, []>("op_348_cast_fp16")];
+            tensor<string, []> var_350_equation_0 = const()[name = tensor<string, []>("op_350_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_350_cast_fp16 = einsum(equation = var_350_equation_0, values = (var_316_cast_fp16_0, var_341_cast_fp16))[name = tensor<string, []>("op_350_cast_fp16")];
+            tensor<string, []> var_352_equation_0 = const()[name = tensor<string, []>("op_352_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_352_cast_fp16 = einsum(equation = var_352_equation_0, values = (var_316_cast_fp16_1, var_342_cast_fp16))[name = tensor<string, []>("op_352_cast_fp16")];
+            tensor<string, []> var_354_equation_0 = const()[name = tensor<string, []>("op_354_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_354_cast_fp16 = einsum(equation = var_354_equation_0, values = (var_316_cast_fp16_2, var_343_cast_fp16))[name = tensor<string, []>("op_354_cast_fp16")];
+            tensor<string, []> var_356_equation_0 = const()[name = tensor<string, []>("op_356_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_356_cast_fp16 = einsum(equation = var_356_equation_0, values = (var_316_cast_fp16_3, var_344_cast_fp16))[name = tensor<string, []>("op_356_cast_fp16")];
+            tensor<string, []> var_358_equation_0 = const()[name = tensor<string, []>("op_358_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_358_cast_fp16 = einsum(equation = var_358_equation_0, values = (var_316_cast_fp16_4, var_345_cast_fp16))[name = tensor<string, []>("op_358_cast_fp16")];
+            tensor<string, []> var_360_equation_0 = const()[name = tensor<string, []>("op_360_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_360_cast_fp16 = einsum(equation = var_360_equation_0, values = (var_316_cast_fp16_5, var_346_cast_fp16))[name = tensor<string, []>("op_360_cast_fp16")];
+            tensor<string, []> var_362_equation_0 = const()[name = tensor<string, []>("op_362_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_362_cast_fp16 = einsum(equation = var_362_equation_0, values = (var_316_cast_fp16_6, var_347_cast_fp16))[name = tensor<string, []>("op_362_cast_fp16")];
+            tensor<string, []> var_364_equation_0 = const()[name = tensor<string, []>("op_364_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_364_cast_fp16 = einsum(equation = var_364_equation_0, values = (var_316_cast_fp16_7, var_348_cast_fp16))[name = tensor<string, []>("op_364_cast_fp16")];
+            tensor<bool, []> input_15_interleave_0 = const()[name = tensor<string, []>("input_15_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_15_cast_fp16 = concat(axis = var_245, interleave = input_15_interleave_0, values = (var_350_cast_fp16, var_352_cast_fp16, var_354_cast_fp16, var_356_cast_fp16, var_358_cast_fp16, var_360_cast_fp16, var_362_cast_fp16, var_364_cast_fp16))[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<string, []> var_373_pad_type_0 = const()[name = tensor<string, []>("op_373_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_373_strides_0 = const()[name = tensor<string, []>("op_373_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_373_pad_0 = const()[name = tensor<string, []>("op_373_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_373_dilations_0 = const()[name = tensor<string, []>("op_373_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_373_groups_0 = const()[name = tensor<string, []>("op_373_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_1_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11239168)))];
+            tensor<fp16, [512]> blocks_1_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11763520)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_373_cast_fp16 = conv(bias = blocks_1_attn_out_bias_to_fp16, dilations = var_373_dilations_0, groups = var_373_groups_0, pad = var_373_pad_0, pad_type = var_373_pad_type_0, strides = var_373_strides_0, weight = blocks_1_attn_out_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("op_373_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = var_373_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> input_17_axes_0 = const()[name = tensor<string, []>("input_17_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_17_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_17_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11764608)))];
+            tensor<fp16, [512]> input_17_beta_0_to_fp16 = const()[name = tensor<string, []>("input_17_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11765696)))];
+            tensor<fp16, []> var_383_to_fp16 = const()[name = tensor<string, []>("op_383_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_17_cast_fp16 = layer_norm(axes = input_17_axes_0, beta = input_17_beta_0_to_fp16, epsilon = var_383_to_fp16, gamma = input_17_gamma_0_to_fp16, x = inputs_7_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_pad_type_0 = const()[name = tensor<string, []>("input_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_19_strides_0 = const()[name = tensor<string, []>("input_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_19_pad_0 = const()[name = tensor<string, []>("input_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_19_dilations_0 = const()[name = tensor<string, []>("input_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_19_groups_0 = const()[name = tensor<string, []>("input_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [2048, 512, 1, 1]> blocks_1_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11766784)))];
+            tensor<fp16, [2048]> blocks_1_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13864000)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_19_cast_fp16 = conv(bias = blocks_1_mlp_0_bias_to_fp16, dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = blocks_1_mlp_0_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<string, []> input_21_mode_0 = const()[name = tensor<string, []>("input_21_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_21_cast_fp16 = gelu(mode = input_21_mode_0, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> var_409_pad_type_0 = const()[name = tensor<string, []>("op_409_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_409_strides_0 = const()[name = tensor<string, []>("op_409_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_409_pad_0 = const()[name = tensor<string, []>("op_409_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_409_dilations_0 = const()[name = tensor<string, []>("op_409_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_409_groups_0 = const()[name = tensor<string, []>("op_409_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 2048, 1, 1]> blocks_1_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13868160)))];
+            tensor<fp16, [512]> blocks_1_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15965376)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_409_cast_fp16 = conv(bias = blocks_1_mlp_2_bias_to_fp16, dilations = var_409_dilations_0, groups = var_409_groups_0, pad = var_409_pad_0, pad_type = var_409_pad_type_0, strides = var_409_strides_0, weight = blocks_1_mlp_2_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("op_409_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = var_409_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_418 = const()[name = tensor<string, []>("op_418"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_23_axes_0 = const()[name = tensor<string, []>("input_23_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_23_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15966464)))];
+            tensor<fp16, [512]> input_23_beta_0_to_fp16 = const()[name = tensor<string, []>("input_23_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15967552)))];
+            tensor<fp16, []> var_434_to_fp16 = const()[name = tensor<string, []>("op_434_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_23_cast_fp16 = layer_norm(axes = input_23_axes_0, beta = input_23_beta_0_to_fp16, epsilon = var_434_to_fp16, gamma = input_23_gamma_0_to_fp16, x = inputs_9_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<string, []> q_5_pad_type_0 = const()[name = tensor<string, []>("q_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_5_strides_0 = const()[name = tensor<string, []>("q_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_5_pad_0 = const()[name = tensor<string, []>("q_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_5_dilations_0 = const()[name = tensor<string, []>("q_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_5_groups_0 = const()[name = tensor<string, []>("q_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> var_469_weight_0_to_fp16 = const()[name = tensor<string, []>("op_469_weight_0_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15968640)))];
+            tensor<fp16, [512]> var_469_bias_0_to_fp16 = const()[name = tensor<string, []>("op_469_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16492992)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_469_cast_fp16 = conv(bias = var_469_bias_0_to_fp16, dilations = q_5_dilations_0, groups = q_5_groups_0, pad = q_5_pad_0, pad_type = q_5_pad_type_0, strides = q_5_strides_0, weight = var_469_weight_0_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_469_cast_fp16")];
+            tensor<string, []> k_5_pad_type_0 = const()[name = tensor<string, []>("k_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_5_strides_0 = const()[name = tensor<string, []>("k_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_5_pad_0 = const()[name = tensor<string, []>("k_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_5_dilations_0 = const()[name = tensor<string, []>("k_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_5_groups_0 = const()[name = tensor<string, []>("k_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_2_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_key_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16494080)))];
+            tensor<fp16, [1, 512, 1, 1500]> k_5_cast_fp16 = conv(dilations = k_5_dilations_0, groups = k_5_groups_0, pad = k_5_pad_0, pad_type = k_5_pad_type_0, strides = k_5_strides_0, weight = blocks_2_attn_key_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("k_5_cast_fp16")];
+            tensor<string, []> var_467_pad_type_0 = const()[name = tensor<string, []>("op_467_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_467_strides_0 = const()[name = tensor<string, []>("op_467_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_467_pad_0 = const()[name = tensor<string, []>("op_467_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_467_dilations_0 = const()[name = tensor<string, []>("op_467_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_467_groups_0 = const()[name = tensor<string, []>("op_467_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_2_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17018432)))];
+            tensor<fp16, [512]> blocks_2_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17542784)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_467_cast_fp16 = conv(bias = blocks_2_attn_value_bias_to_fp16, dilations = var_467_dilations_0, groups = var_467_groups_0, pad = var_467_pad_0, pad_type = var_467_pad_type_0, strides = var_467_strides_0, weight = blocks_2_attn_value_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_467_cast_fp16")];
+            tensor<int32, [8]> tile_6 = const()[name = tensor<string, []>("tile_6"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_470_axis_0 = const()[name = tensor<string, []>("op_470_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_470_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_470_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_470_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_470_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_470_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_470_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_470_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_470_cast_fp16_7 = split(axis = var_470_axis_0, split_sizes = tile_6, x = var_469_cast_fp16)[name = tensor<string, []>("op_470_cast_fp16")];
+            tensor<int32, [4]> var_479_perm_0 = const()[name = tensor<string, []>("op_479_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [8]> tile_7 = const()[name = tensor<string, []>("tile_7"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_480_axis_0 = const()[name = tensor<string, []>("op_480_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 512]> var_479_cast_fp16 = transpose(perm = var_479_perm_0, x = k_5_cast_fp16)[name = tensor<string, []>("transpose_4")];
+            tensor<fp16, [1, 1500, 1, 64]> var_480_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_480_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_480_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_480_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_480_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_480_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_480_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_480_cast_fp16_7 = split(axis = var_480_axis_0, split_sizes = tile_7, x = var_479_cast_fp16)[name = tensor<string, []>("op_480_cast_fp16")];
+            tensor<int32, [8]> tile_8 = const()[name = tensor<string, []>("tile_8"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_489_axis_0 = const()[name = tensor<string, []>("op_489_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_489_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_489_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_489_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_489_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_489_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_489_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_489_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_489_cast_fp16_7 = split(axis = var_489_axis_0, split_sizes = tile_8, x = var_467_cast_fp16)[name = tensor<string, []>("op_489_cast_fp16")];
+            tensor<string, []> aw_33_equation_0 = const()[name = tensor<string, []>("aw_33_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_33_cast_fp16 = einsum(equation = aw_33_equation_0, values = (var_480_cast_fp16_0, var_470_cast_fp16_0))[name = tensor<string, []>("aw_33_cast_fp16")];
+            tensor<string, []> aw_35_equation_0 = const()[name = tensor<string, []>("aw_35_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_35_cast_fp16 = einsum(equation = aw_35_equation_0, values = (var_480_cast_fp16_1, var_470_cast_fp16_1))[name = tensor<string, []>("aw_35_cast_fp16")];
+            tensor<string, []> aw_37_equation_0 = const()[name = tensor<string, []>("aw_37_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_37_cast_fp16 = einsum(equation = aw_37_equation_0, values = (var_480_cast_fp16_2, var_470_cast_fp16_2))[name = tensor<string, []>("aw_37_cast_fp16")];
+            tensor<string, []> aw_39_equation_0 = const()[name = tensor<string, []>("aw_39_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_39_cast_fp16 = einsum(equation = aw_39_equation_0, values = (var_480_cast_fp16_3, var_470_cast_fp16_3))[name = tensor<string, []>("aw_39_cast_fp16")];
+            tensor<string, []> aw_41_equation_0 = const()[name = tensor<string, []>("aw_41_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_41_cast_fp16 = einsum(equation = aw_41_equation_0, values = (var_480_cast_fp16_4, var_470_cast_fp16_4))[name = tensor<string, []>("aw_41_cast_fp16")];
+            tensor<string, []> aw_43_equation_0 = const()[name = tensor<string, []>("aw_43_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_43_cast_fp16 = einsum(equation = aw_43_equation_0, values = (var_480_cast_fp16_5, var_470_cast_fp16_5))[name = tensor<string, []>("aw_43_cast_fp16")];
+            tensor<string, []> aw_45_equation_0 = const()[name = tensor<string, []>("aw_45_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_45_cast_fp16 = einsum(equation = aw_45_equation_0, values = (var_480_cast_fp16_6, var_470_cast_fp16_6))[name = tensor<string, []>("aw_45_cast_fp16")];
+            tensor<string, []> aw_47_equation_0 = const()[name = tensor<string, []>("aw_47_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_47_cast_fp16 = einsum(equation = aw_47_equation_0, values = (var_480_cast_fp16_7, var_470_cast_fp16_7))[name = tensor<string, []>("aw_47_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_514_cast_fp16 = softmax(axis = var_418, x = aw_33_cast_fp16)[name = tensor<string, []>("op_514_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_515_cast_fp16 = softmax(axis = var_418, x = aw_35_cast_fp16)[name = tensor<string, []>("op_515_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_516_cast_fp16 = softmax(axis = var_418, x = aw_37_cast_fp16)[name = tensor<string, []>("op_516_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_517_cast_fp16 = softmax(axis = var_418, x = aw_39_cast_fp16)[name = tensor<string, []>("op_517_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_518_cast_fp16 = softmax(axis = var_418, x = aw_41_cast_fp16)[name = tensor<string, []>("op_518_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_519_cast_fp16 = softmax(axis = var_418, x = aw_43_cast_fp16)[name = tensor<string, []>("op_519_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_520_cast_fp16 = softmax(axis = var_418, x = aw_45_cast_fp16)[name = tensor<string, []>("op_520_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_521_cast_fp16 = softmax(axis = var_418, x = aw_47_cast_fp16)[name = tensor<string, []>("op_521_cast_fp16")];
+            tensor<string, []> var_523_equation_0 = const()[name = tensor<string, []>("op_523_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_489_cast_fp16_0, var_514_cast_fp16))[name = tensor<string, []>("op_523_cast_fp16")];
+            tensor<string, []> var_525_equation_0 = const()[name = tensor<string, []>("op_525_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_525_cast_fp16 = einsum(equation = var_525_equation_0, values = (var_489_cast_fp16_1, var_515_cast_fp16))[name = tensor<string, []>("op_525_cast_fp16")];
+            tensor<string, []> var_527_equation_0 = const()[name = tensor<string, []>("op_527_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_489_cast_fp16_2, var_516_cast_fp16))[name = tensor<string, []>("op_527_cast_fp16")];
+            tensor<string, []> var_529_equation_0 = const()[name = tensor<string, []>("op_529_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_529_cast_fp16 = einsum(equation = var_529_equation_0, values = (var_489_cast_fp16_3, var_517_cast_fp16))[name = tensor<string, []>("op_529_cast_fp16")];
+            tensor<string, []> var_531_equation_0 = const()[name = tensor<string, []>("op_531_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_489_cast_fp16_4, var_518_cast_fp16))[name = tensor<string, []>("op_531_cast_fp16")];
+            tensor<string, []> var_533_equation_0 = const()[name = tensor<string, []>("op_533_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_533_cast_fp16 = einsum(equation = var_533_equation_0, values = (var_489_cast_fp16_5, var_519_cast_fp16))[name = tensor<string, []>("op_533_cast_fp16")];
+            tensor<string, []> var_535_equation_0 = const()[name = tensor<string, []>("op_535_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_489_cast_fp16_6, var_520_cast_fp16))[name = tensor<string, []>("op_535_cast_fp16")];
+            tensor<string, []> var_537_equation_0 = const()[name = tensor<string, []>("op_537_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_537_cast_fp16 = einsum(equation = var_537_equation_0, values = (var_489_cast_fp16_7, var_521_cast_fp16))[name = tensor<string, []>("op_537_cast_fp16")];
+            tensor<bool, []> input_25_interleave_0 = const()[name = tensor<string, []>("input_25_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_25_cast_fp16 = concat(axis = var_418, interleave = input_25_interleave_0, values = (var_523_cast_fp16, var_525_cast_fp16, var_527_cast_fp16, var_529_cast_fp16, var_531_cast_fp16, var_533_cast_fp16, var_535_cast_fp16, var_537_cast_fp16))[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<string, []> var_546_pad_type_0 = const()[name = tensor<string, []>("op_546_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_546_strides_0 = const()[name = tensor<string, []>("op_546_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_546_pad_0 = const()[name = tensor<string, []>("op_546_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_546_dilations_0 = const()[name = tensor<string, []>("op_546_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_546_groups_0 = const()[name = tensor<string, []>("op_546_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_2_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17543872)))];
+            tensor<fp16, [512]> blocks_2_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18068224)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_546_cast_fp16 = conv(bias = blocks_2_attn_out_bias_to_fp16, dilations = var_546_dilations_0, groups = var_546_groups_0, pad = var_546_pad_0, pad_type = var_546_pad_type_0, strides = var_546_strides_0, weight = blocks_2_attn_out_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("op_546_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = var_546_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> input_27_axes_0 = const()[name = tensor<string, []>("input_27_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18069312)))];
+            tensor<fp16, [512]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18070400)))];
+            tensor<fp16, []> var_556_to_fp16 = const()[name = tensor<string, []>("op_556_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_27_cast_fp16 = layer_norm(axes = input_27_axes_0, beta = input_27_beta_0_to_fp16, epsilon = var_556_to_fp16, gamma = input_27_gamma_0_to_fp16, x = inputs_11_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_29_strides_0 = const()[name = tensor<string, []>("input_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_29_dilations_0 = const()[name = tensor<string, []>("input_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_29_groups_0 = const()[name = tensor<string, []>("input_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [2048, 512, 1, 1]> blocks_2_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18071488)))];
+            tensor<fp16, [2048]> blocks_2_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20168704)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_29_cast_fp16 = conv(bias = blocks_2_mlp_0_bias_to_fp16, dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = blocks_2_mlp_0_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<string, []> var_582_pad_type_0 = const()[name = tensor<string, []>("op_582_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_582_strides_0 = const()[name = tensor<string, []>("op_582_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_582_pad_0 = const()[name = tensor<string, []>("op_582_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_582_dilations_0 = const()[name = tensor<string, []>("op_582_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_582_groups_0 = const()[name = tensor<string, []>("op_582_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 2048, 1, 1]> blocks_2_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20172864)))];
+            tensor<fp16, [512]> blocks_2_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22270080)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_582_cast_fp16 = conv(bias = blocks_2_mlp_2_bias_to_fp16, dilations = var_582_dilations_0, groups = var_582_groups_0, pad = var_582_pad_0, pad_type = var_582_pad_type_0, strides = var_582_strides_0, weight = blocks_2_mlp_2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("op_582_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = var_582_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_591 = const()[name = tensor<string, []>("op_591"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_33_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_33_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22271168)))];
+            tensor<fp16, [512]> input_33_beta_0_to_fp16 = const()[name = tensor<string, []>("input_33_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22272256)))];
+            tensor<fp16, []> var_607_to_fp16 = const()[name = tensor<string, []>("op_607_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, beta = input_33_beta_0_to_fp16, epsilon = var_607_to_fp16, gamma = input_33_gamma_0_to_fp16, x = inputs_13_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<string, []> q_7_pad_type_0 = const()[name = tensor<string, []>("q_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_7_strides_0 = const()[name = tensor<string, []>("q_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_7_pad_0 = const()[name = tensor<string, []>("q_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_7_dilations_0 = const()[name = tensor<string, []>("q_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_7_groups_0 = const()[name = tensor<string, []>("q_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> var_642_weight_0_to_fp16 = const()[name = tensor<string, []>("op_642_weight_0_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22273344)))];
+            tensor<fp16, [512]> var_642_bias_0_to_fp16 = const()[name = tensor<string, []>("op_642_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22797696)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_642_cast_fp16 = conv(bias = var_642_bias_0_to_fp16, dilations = q_7_dilations_0, groups = q_7_groups_0, pad = q_7_pad_0, pad_type = q_7_pad_type_0, strides = q_7_strides_0, weight = var_642_weight_0_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_642_cast_fp16")];
+            tensor<string, []> k_7_pad_type_0 = const()[name = tensor<string, []>("k_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_7_strides_0 = const()[name = tensor<string, []>("k_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_7_pad_0 = const()[name = tensor<string, []>("k_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_7_dilations_0 = const()[name = tensor<string, []>("k_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_7_groups_0 = const()[name = tensor<string, []>("k_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_3_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_key_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22798784)))];
+            tensor<fp16, [1, 512, 1, 1500]> k_7_cast_fp16 = conv(dilations = k_7_dilations_0, groups = k_7_groups_0, pad = k_7_pad_0, pad_type = k_7_pad_type_0, strides = k_7_strides_0, weight = blocks_3_attn_key_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("k_7_cast_fp16")];
+            tensor<string, []> var_640_pad_type_0 = const()[name = tensor<string, []>("op_640_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_640_strides_0 = const()[name = tensor<string, []>("op_640_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_640_pad_0 = const()[name = tensor<string, []>("op_640_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_640_dilations_0 = const()[name = tensor<string, []>("op_640_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_640_groups_0 = const()[name = tensor<string, []>("op_640_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_3_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23323136)))];
+            tensor<fp16, [512]> blocks_3_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23847488)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_640_cast_fp16 = conv(bias = blocks_3_attn_value_bias_to_fp16, dilations = var_640_dilations_0, groups = var_640_groups_0, pad = var_640_pad_0, pad_type = var_640_pad_type_0, strides = var_640_strides_0, weight = blocks_3_attn_value_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_640_cast_fp16")];
+            tensor<int32, [8]> tile_9 = const()[name = tensor<string, []>("tile_9"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_643_axis_0 = const()[name = tensor<string, []>("op_643_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16_7 = split(axis = var_643_axis_0, split_sizes = tile_9, x = var_642_cast_fp16)[name = tensor<string, []>("op_643_cast_fp16")];
+            tensor<int32, [4]> var_652_perm_0 = const()[name = tensor<string, []>("op_652_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [8]> tile_10 = const()[name = tensor<string, []>("tile_10"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_653_axis_0 = const()[name = tensor<string, []>("op_653_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 512]> var_652_cast_fp16 = transpose(perm = var_652_perm_0, x = k_7_cast_fp16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1500, 1, 64]> var_653_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_653_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_653_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_653_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_653_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_653_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_653_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_653_cast_fp16_7 = split(axis = var_653_axis_0, split_sizes = tile_10, x = var_652_cast_fp16)[name = tensor<string, []>("op_653_cast_fp16")];
+            tensor<int32, [8]> tile_11 = const()[name = tensor<string, []>("tile_11"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_662_axis_0 = const()[name = tensor<string, []>("op_662_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_662_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_662_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_662_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_662_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_662_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_662_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_662_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_662_cast_fp16_7 = split(axis = var_662_axis_0, split_sizes = tile_11, x = var_640_cast_fp16)[name = tensor<string, []>("op_662_cast_fp16")];
+            tensor<string, []> aw_49_equation_0 = const()[name = tensor<string, []>("aw_49_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_49_cast_fp16 = einsum(equation = aw_49_equation_0, values = (var_653_cast_fp16_0, var_643_cast_fp16_0))[name = tensor<string, []>("aw_49_cast_fp16")];
+            tensor<string, []> aw_51_equation_0 = const()[name = tensor<string, []>("aw_51_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_51_cast_fp16 = einsum(equation = aw_51_equation_0, values = (var_653_cast_fp16_1, var_643_cast_fp16_1))[name = tensor<string, []>("aw_51_cast_fp16")];
+            tensor<string, []> aw_53_equation_0 = const()[name = tensor<string, []>("aw_53_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_53_cast_fp16 = einsum(equation = aw_53_equation_0, values = (var_653_cast_fp16_2, var_643_cast_fp16_2))[name = tensor<string, []>("aw_53_cast_fp16")];
+            tensor<string, []> aw_55_equation_0 = const()[name = tensor<string, []>("aw_55_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_55_cast_fp16 = einsum(equation = aw_55_equation_0, values = (var_653_cast_fp16_3, var_643_cast_fp16_3))[name = tensor<string, []>("aw_55_cast_fp16")];
+            tensor<string, []> aw_57_equation_0 = const()[name = tensor<string, []>("aw_57_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_57_cast_fp16 = einsum(equation = aw_57_equation_0, values = (var_653_cast_fp16_4, var_643_cast_fp16_4))[name = tensor<string, []>("aw_57_cast_fp16")];
+            tensor<string, []> aw_59_equation_0 = const()[name = tensor<string, []>("aw_59_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_59_cast_fp16 = einsum(equation = aw_59_equation_0, values = (var_653_cast_fp16_5, var_643_cast_fp16_5))[name = tensor<string, []>("aw_59_cast_fp16")];
+            tensor<string, []> aw_61_equation_0 = const()[name = tensor<string, []>("aw_61_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_61_cast_fp16 = einsum(equation = aw_61_equation_0, values = (var_653_cast_fp16_6, var_643_cast_fp16_6))[name = tensor<string, []>("aw_61_cast_fp16")];
+            tensor<string, []> aw_63_equation_0 = const()[name = tensor<string, []>("aw_63_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_63_cast_fp16 = einsum(equation = aw_63_equation_0, values = (var_653_cast_fp16_7, var_643_cast_fp16_7))[name = tensor<string, []>("aw_63_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_687_cast_fp16 = softmax(axis = var_591, x = aw_49_cast_fp16)[name = tensor<string, []>("op_687_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_688_cast_fp16 = softmax(axis = var_591, x = aw_51_cast_fp16)[name = tensor<string, []>("op_688_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_689_cast_fp16 = softmax(axis = var_591, x = aw_53_cast_fp16)[name = tensor<string, []>("op_689_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_690_cast_fp16 = softmax(axis = var_591, x = aw_55_cast_fp16)[name = tensor<string, []>("op_690_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_691_cast_fp16 = softmax(axis = var_591, x = aw_57_cast_fp16)[name = tensor<string, []>("op_691_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_692_cast_fp16 = softmax(axis = var_591, x = aw_59_cast_fp16)[name = tensor<string, []>("op_692_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_693_cast_fp16 = softmax(axis = var_591, x = aw_61_cast_fp16)[name = tensor<string, []>("op_693_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_694_cast_fp16 = softmax(axis = var_591, x = aw_63_cast_fp16)[name = tensor<string, []>("op_694_cast_fp16")];
+            tensor<string, []> var_696_equation_0 = const()[name = tensor<string, []>("op_696_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_696_cast_fp16 = einsum(equation = var_696_equation_0, values = (var_662_cast_fp16_0, var_687_cast_fp16))[name = tensor<string, []>("op_696_cast_fp16")];
+            tensor<string, []> var_698_equation_0 = const()[name = tensor<string, []>("op_698_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_698_cast_fp16 = einsum(equation = var_698_equation_0, values = (var_662_cast_fp16_1, var_688_cast_fp16))[name = tensor<string, []>("op_698_cast_fp16")];
+            tensor<string, []> var_700_equation_0 = const()[name = tensor<string, []>("op_700_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_700_cast_fp16 = einsum(equation = var_700_equation_0, values = (var_662_cast_fp16_2, var_689_cast_fp16))[name = tensor<string, []>("op_700_cast_fp16")];
+            tensor<string, []> var_702_equation_0 = const()[name = tensor<string, []>("op_702_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_702_cast_fp16 = einsum(equation = var_702_equation_0, values = (var_662_cast_fp16_3, var_690_cast_fp16))[name = tensor<string, []>("op_702_cast_fp16")];
+            tensor<string, []> var_704_equation_0 = const()[name = tensor<string, []>("op_704_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_704_cast_fp16 = einsum(equation = var_704_equation_0, values = (var_662_cast_fp16_4, var_691_cast_fp16))[name = tensor<string, []>("op_704_cast_fp16")];
+            tensor<string, []> var_706_equation_0 = const()[name = tensor<string, []>("op_706_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_706_cast_fp16 = einsum(equation = var_706_equation_0, values = (var_662_cast_fp16_5, var_692_cast_fp16))[name = tensor<string, []>("op_706_cast_fp16")];
+            tensor<string, []> var_708_equation_0 = const()[name = tensor<string, []>("op_708_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_708_cast_fp16 = einsum(equation = var_708_equation_0, values = (var_662_cast_fp16_6, var_693_cast_fp16))[name = tensor<string, []>("op_708_cast_fp16")];
+            tensor<string, []> var_710_equation_0 = const()[name = tensor<string, []>("op_710_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_710_cast_fp16 = einsum(equation = var_710_equation_0, values = (var_662_cast_fp16_7, var_694_cast_fp16))[name = tensor<string, []>("op_710_cast_fp16")];
+            tensor<bool, []> input_35_interleave_0 = const()[name = tensor<string, []>("input_35_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_35_cast_fp16 = concat(axis = var_591, interleave = input_35_interleave_0, values = (var_696_cast_fp16, var_698_cast_fp16, var_700_cast_fp16, var_702_cast_fp16, var_704_cast_fp16, var_706_cast_fp16, var_708_cast_fp16, var_710_cast_fp16))[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<string, []> var_719_pad_type_0 = const()[name = tensor<string, []>("op_719_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_719_strides_0 = const()[name = tensor<string, []>("op_719_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_719_pad_0 = const()[name = tensor<string, []>("op_719_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_719_dilations_0 = const()[name = tensor<string, []>("op_719_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_719_groups_0 = const()[name = tensor<string, []>("op_719_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_3_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23848576)))];
+            tensor<fp16, [512]> blocks_3_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24372928)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_719_cast_fp16 = conv(bias = blocks_3_attn_out_bias_to_fp16, dilations = var_719_dilations_0, groups = var_719_groups_0, pad = var_719_pad_0, pad_type = var_719_pad_type_0, strides = var_719_strides_0, weight = blocks_3_attn_out_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("op_719_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = var_719_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> input_37_axes_0 = const()[name = tensor<string, []>("input_37_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_37_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24374016)))];
+            tensor<fp16, [512]> input_37_beta_0_to_fp16 = const()[name = tensor<string, []>("input_37_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24375104)))];
+            tensor<fp16, []> var_729_to_fp16 = const()[name = tensor<string, []>("op_729_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = input_37_beta_0_to_fp16, epsilon = var_729_to_fp16, gamma = input_37_gamma_0_to_fp16, x = inputs_15_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_pad_type_0 = const()[name = tensor<string, []>("input_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_39_strides_0 = const()[name = tensor<string, []>("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_39_pad_0 = const()[name = tensor<string, []>("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_39_dilations_0 = const()[name = tensor<string, []>("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_39_groups_0 = const()[name = tensor<string, []>("input_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [2048, 512, 1, 1]> blocks_3_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24376192)))];
+            tensor<fp16, [2048]> blocks_3_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26473408)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_39_cast_fp16 = conv(bias = blocks_3_mlp_0_bias_to_fp16, dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = blocks_3_mlp_0_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<string, []> input_41_mode_0 = const()[name = tensor<string, []>("input_41_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = input_39_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<string, []> var_755_pad_type_0 = const()[name = tensor<string, []>("op_755_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_755_strides_0 = const()[name = tensor<string, []>("op_755_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_755_pad_0 = const()[name = tensor<string, []>("op_755_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_755_dilations_0 = const()[name = tensor<string, []>("op_755_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_755_groups_0 = const()[name = tensor<string, []>("op_755_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 2048, 1, 1]> blocks_3_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26477568)))];
+            tensor<fp16, [512]> blocks_3_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28574784)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_755_cast_fp16 = conv(bias = blocks_3_mlp_2_bias_to_fp16, dilations = var_755_dilations_0, groups = var_755_groups_0, pad = var_755_pad_0, pad_type = var_755_pad_type_0, strides = var_755_strides_0, weight = blocks_3_mlp_2_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("op_755_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = var_755_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, []> var_764 = const()[name = tensor<string, []>("op_764"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_43_axes_0 = const()[name = tensor<string, []>("input_43_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_43_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28575872)))];
+            tensor<fp16, [512]> input_43_beta_0_to_fp16 = const()[name = tensor<string, []>("input_43_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28576960)))];
+            tensor<fp16, []> var_780_to_fp16 = const()[name = tensor<string, []>("op_780_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_43_cast_fp16 = layer_norm(axes = input_43_axes_0, beta = input_43_beta_0_to_fp16, epsilon = var_780_to_fp16, gamma = input_43_gamma_0_to_fp16, x = inputs_17_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<string, []> q_9_pad_type_0 = const()[name = tensor<string, []>("q_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_9_strides_0 = const()[name = tensor<string, []>("q_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_9_pad_0 = const()[name = tensor<string, []>("q_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_9_dilations_0 = const()[name = tensor<string, []>("q_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_9_groups_0 = const()[name = tensor<string, []>("q_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> var_815_weight_0_to_fp16 = const()[name = tensor<string, []>("op_815_weight_0_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28578048)))];
+            tensor<fp16, [512]> var_815_bias_0_to_fp16 = const()[name = tensor<string, []>("op_815_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29102400)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_815_cast_fp16 = conv(bias = var_815_bias_0_to_fp16, dilations = q_9_dilations_0, groups = q_9_groups_0, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = q_9_strides_0, weight = var_815_weight_0_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_815_cast_fp16")];
+            tensor<string, []> k_9_pad_type_0 = const()[name = tensor<string, []>("k_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_9_strides_0 = const()[name = tensor<string, []>("k_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_9_pad_0 = const()[name = tensor<string, []>("k_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_9_dilations_0 = const()[name = tensor<string, []>("k_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_9_groups_0 = const()[name = tensor<string, []>("k_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_4_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_key_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29103488)))];
+            tensor<fp16, [1, 512, 1, 1500]> k_9_cast_fp16 = conv(dilations = k_9_dilations_0, groups = k_9_groups_0, pad = k_9_pad_0, pad_type = k_9_pad_type_0, strides = k_9_strides_0, weight = blocks_4_attn_key_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("k_9_cast_fp16")];
+            tensor<string, []> var_813_pad_type_0 = const()[name = tensor<string, []>("op_813_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_813_strides_0 = const()[name = tensor<string, []>("op_813_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_813_pad_0 = const()[name = tensor<string, []>("op_813_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_813_dilations_0 = const()[name = tensor<string, []>("op_813_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_813_groups_0 = const()[name = tensor<string, []>("op_813_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_4_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29627840)))];
+            tensor<fp16, [512]> blocks_4_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30152192)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_813_cast_fp16 = conv(bias = blocks_4_attn_value_bias_to_fp16, dilations = var_813_dilations_0, groups = var_813_groups_0, pad = var_813_pad_0, pad_type = var_813_pad_type_0, strides = var_813_strides_0, weight = blocks_4_attn_value_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_813_cast_fp16")];
+            tensor<int32, [8]> tile_12 = const()[name = tensor<string, []>("tile_12"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_816_axis_0 = const()[name = tensor<string, []>("op_816_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_816_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_816_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_816_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_816_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_816_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_816_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_816_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_816_cast_fp16_7 = split(axis = var_816_axis_0, split_sizes = tile_12, x = var_815_cast_fp16)[name = tensor<string, []>("op_816_cast_fp16")];
+            tensor<int32, [4]> var_825_perm_0 = const()[name = tensor<string, []>("op_825_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [8]> tile_13 = const()[name = tensor<string, []>("tile_13"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_826_axis_0 = const()[name = tensor<string, []>("op_826_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 512]> var_825_cast_fp16 = transpose(perm = var_825_perm_0, x = k_9_cast_fp16)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 1500, 1, 64]> var_826_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_826_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_826_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_826_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_826_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_826_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_826_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_826_cast_fp16_7 = split(axis = var_826_axis_0, split_sizes = tile_13, x = var_825_cast_fp16)[name = tensor<string, []>("op_826_cast_fp16")];
+            tensor<int32, [8]> tile_14 = const()[name = tensor<string, []>("tile_14"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_835_axis_0 = const()[name = tensor<string, []>("op_835_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_835_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_835_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_835_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_835_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_835_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_835_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_835_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_835_cast_fp16_7 = split(axis = var_835_axis_0, split_sizes = tile_14, x = var_813_cast_fp16)[name = tensor<string, []>("op_835_cast_fp16")];
+            tensor<string, []> aw_65_equation_0 = const()[name = tensor<string, []>("aw_65_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_65_cast_fp16 = einsum(equation = aw_65_equation_0, values = (var_826_cast_fp16_0, var_816_cast_fp16_0))[name = tensor<string, []>("aw_65_cast_fp16")];
+            tensor<string, []> aw_67_equation_0 = const()[name = tensor<string, []>("aw_67_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_67_cast_fp16 = einsum(equation = aw_67_equation_0, values = (var_826_cast_fp16_1, var_816_cast_fp16_1))[name = tensor<string, []>("aw_67_cast_fp16")];
+            tensor<string, []> aw_69_equation_0 = const()[name = tensor<string, []>("aw_69_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_69_cast_fp16 = einsum(equation = aw_69_equation_0, values = (var_826_cast_fp16_2, var_816_cast_fp16_2))[name = tensor<string, []>("aw_69_cast_fp16")];
+            tensor<string, []> aw_71_equation_0 = const()[name = tensor<string, []>("aw_71_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_71_cast_fp16 = einsum(equation = aw_71_equation_0, values = (var_826_cast_fp16_3, var_816_cast_fp16_3))[name = tensor<string, []>("aw_71_cast_fp16")];
+            tensor<string, []> aw_73_equation_0 = const()[name = tensor<string, []>("aw_73_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_73_cast_fp16 = einsum(equation = aw_73_equation_0, values = (var_826_cast_fp16_4, var_816_cast_fp16_4))[name = tensor<string, []>("aw_73_cast_fp16")];
+            tensor<string, []> aw_75_equation_0 = const()[name = tensor<string, []>("aw_75_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_75_cast_fp16 = einsum(equation = aw_75_equation_0, values = (var_826_cast_fp16_5, var_816_cast_fp16_5))[name = tensor<string, []>("aw_75_cast_fp16")];
+            tensor<string, []> aw_77_equation_0 = const()[name = tensor<string, []>("aw_77_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_77_cast_fp16 = einsum(equation = aw_77_equation_0, values = (var_826_cast_fp16_6, var_816_cast_fp16_6))[name = tensor<string, []>("aw_77_cast_fp16")];
+            tensor<string, []> aw_79_equation_0 = const()[name = tensor<string, []>("aw_79_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_79_cast_fp16 = einsum(equation = aw_79_equation_0, values = (var_826_cast_fp16_7, var_816_cast_fp16_7))[name = tensor<string, []>("aw_79_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_860_cast_fp16 = softmax(axis = var_764, x = aw_65_cast_fp16)[name = tensor<string, []>("op_860_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_861_cast_fp16 = softmax(axis = var_764, x = aw_67_cast_fp16)[name = tensor<string, []>("op_861_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_862_cast_fp16 = softmax(axis = var_764, x = aw_69_cast_fp16)[name = tensor<string, []>("op_862_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_863_cast_fp16 = softmax(axis = var_764, x = aw_71_cast_fp16)[name = tensor<string, []>("op_863_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_864_cast_fp16 = softmax(axis = var_764, x = aw_73_cast_fp16)[name = tensor<string, []>("op_864_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_865_cast_fp16 = softmax(axis = var_764, x = aw_75_cast_fp16)[name = tensor<string, []>("op_865_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_866_cast_fp16 = softmax(axis = var_764, x = aw_77_cast_fp16)[name = tensor<string, []>("op_866_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_867_cast_fp16 = softmax(axis = var_764, x = aw_79_cast_fp16)[name = tensor<string, []>("op_867_cast_fp16")];
+            tensor<string, []> var_869_equation_0 = const()[name = tensor<string, []>("op_869_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_869_cast_fp16 = einsum(equation = var_869_equation_0, values = (var_835_cast_fp16_0, var_860_cast_fp16))[name = tensor<string, []>("op_869_cast_fp16")];
+            tensor<string, []> var_871_equation_0 = const()[name = tensor<string, []>("op_871_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16 = einsum(equation = var_871_equation_0, values = (var_835_cast_fp16_1, var_861_cast_fp16))[name = tensor<string, []>("op_871_cast_fp16")];
+            tensor<string, []> var_873_equation_0 = const()[name = tensor<string, []>("op_873_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_873_cast_fp16 = einsum(equation = var_873_equation_0, values = (var_835_cast_fp16_2, var_862_cast_fp16))[name = tensor<string, []>("op_873_cast_fp16")];
+            tensor<string, []> var_875_equation_0 = const()[name = tensor<string, []>("op_875_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_875_cast_fp16 = einsum(equation = var_875_equation_0, values = (var_835_cast_fp16_3, var_863_cast_fp16))[name = tensor<string, []>("op_875_cast_fp16")];
+            tensor<string, []> var_877_equation_0 = const()[name = tensor<string, []>("op_877_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_877_cast_fp16 = einsum(equation = var_877_equation_0, values = (var_835_cast_fp16_4, var_864_cast_fp16))[name = tensor<string, []>("op_877_cast_fp16")];
+            tensor<string, []> var_879_equation_0 = const()[name = tensor<string, []>("op_879_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_879_cast_fp16 = einsum(equation = var_879_equation_0, values = (var_835_cast_fp16_5, var_865_cast_fp16))[name = tensor<string, []>("op_879_cast_fp16")];
+            tensor<string, []> var_881_equation_0 = const()[name = tensor<string, []>("op_881_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_881_cast_fp16 = einsum(equation = var_881_equation_0, values = (var_835_cast_fp16_6, var_866_cast_fp16))[name = tensor<string, []>("op_881_cast_fp16")];
+            tensor<string, []> var_883_equation_0 = const()[name = tensor<string, []>("op_883_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_883_cast_fp16 = einsum(equation = var_883_equation_0, values = (var_835_cast_fp16_7, var_867_cast_fp16))[name = tensor<string, []>("op_883_cast_fp16")];
+            tensor<bool, []> input_45_interleave_0 = const()[name = tensor<string, []>("input_45_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_45_cast_fp16 = concat(axis = var_764, interleave = input_45_interleave_0, values = (var_869_cast_fp16, var_871_cast_fp16, var_873_cast_fp16, var_875_cast_fp16, var_877_cast_fp16, var_879_cast_fp16, var_881_cast_fp16, var_883_cast_fp16))[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<string, []> var_892_pad_type_0 = const()[name = tensor<string, []>("op_892_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_892_strides_0 = const()[name = tensor<string, []>("op_892_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_892_pad_0 = const()[name = tensor<string, []>("op_892_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_892_dilations_0 = const()[name = tensor<string, []>("op_892_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_892_groups_0 = const()[name = tensor<string, []>("op_892_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_4_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30153280)))];
+            tensor<fp16, [512]> blocks_4_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30677632)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_892_cast_fp16 = conv(bias = blocks_4_attn_out_bias_to_fp16, dilations = var_892_dilations_0, groups = var_892_groups_0, pad = var_892_pad_0, pad_type = var_892_pad_type_0, strides = var_892_strides_0, weight = blocks_4_attn_out_weight_to_fp16, x = input_45_cast_fp16)[name = tensor<string, []>("op_892_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = var_892_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, [1]> input_47_axes_0 = const()[name = tensor<string, []>("input_47_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_47_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_47_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30678720)))];
+            tensor<fp16, [512]> input_47_beta_0_to_fp16 = const()[name = tensor<string, []>("input_47_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30679808)))];
+            tensor<fp16, []> var_902_to_fp16 = const()[name = tensor<string, []>("op_902_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, beta = input_47_beta_0_to_fp16, epsilon = var_902_to_fp16, gamma = input_47_gamma_0_to_fp16, x = inputs_19_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<string, []> input_49_pad_type_0 = const()[name = tensor<string, []>("input_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_49_strides_0 = const()[name = tensor<string, []>("input_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_49_pad_0 = const()[name = tensor<string, []>("input_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_49_dilations_0 = const()[name = tensor<string, []>("input_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_49_groups_0 = const()[name = tensor<string, []>("input_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [2048, 512, 1, 1]> blocks_4_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30680896)))];
+            tensor<fp16, [2048]> blocks_4_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32778112)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_49_cast_fp16 = conv(bias = blocks_4_mlp_0_bias_to_fp16, dilations = input_49_dilations_0, groups = input_49_groups_0, pad = input_49_pad_0, pad_type = input_49_pad_type_0, strides = input_49_strides_0, weight = blocks_4_mlp_0_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<string, []> input_51_mode_0 = const()[name = tensor<string, []>("input_51_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_51_cast_fp16 = gelu(mode = input_51_mode_0, x = input_49_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<string, []> var_928_pad_type_0 = const()[name = tensor<string, []>("op_928_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_928_strides_0 = const()[name = tensor<string, []>("op_928_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_928_pad_0 = const()[name = tensor<string, []>("op_928_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_928_dilations_0 = const()[name = tensor<string, []>("op_928_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_928_groups_0 = const()[name = tensor<string, []>("op_928_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 2048, 1, 1]> blocks_4_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32782272)))];
+            tensor<fp16, [512]> blocks_4_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34879488)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_928_cast_fp16 = conv(bias = blocks_4_mlp_2_bias_to_fp16, dilations = var_928_dilations_0, groups = var_928_groups_0, pad = var_928_pad_0, pad_type = var_928_pad_type_0, strides = var_928_strides_0, weight = blocks_4_mlp_2_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("op_928_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = var_928_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, []> var_937 = const()[name = tensor<string, []>("op_937"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_53_axes_0 = const()[name = tensor<string, []>("input_53_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_53_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_53_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34880576)))];
+            tensor<fp16, [512]> input_53_beta_0_to_fp16 = const()[name = tensor<string, []>("input_53_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34881664)))];
+            tensor<fp16, []> var_953_to_fp16 = const()[name = tensor<string, []>("op_953_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_53_cast_fp16 = layer_norm(axes = input_53_axes_0, beta = input_53_beta_0_to_fp16, epsilon = var_953_to_fp16, gamma = input_53_gamma_0_to_fp16, x = inputs_21_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<string, []> q_pad_type_0 = const()[name = tensor<string, []>("q_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_strides_0 = const()[name = tensor<string, []>("q_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_pad_0 = const()[name = tensor<string, []>("q_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_dilations_0 = const()[name = tensor<string, []>("q_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_groups_0 = const()[name = tensor<string, []>("q_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> var_988_weight_0_to_fp16 = const()[name = tensor<string, []>("op_988_weight_0_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34882752)))];
+            tensor<fp16, [512]> var_988_bias_0_to_fp16 = const()[name = tensor<string, []>("op_988_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35407104)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_988_cast_fp16 = conv(bias = var_988_bias_0_to_fp16, dilations = q_dilations_0, groups = q_groups_0, pad = q_pad_0, pad_type = q_pad_type_0, strides = q_strides_0, weight = var_988_weight_0_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_988_cast_fp16")];
+            tensor<string, []> k_pad_type_0 = const()[name = tensor<string, []>("k_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_strides_0 = const()[name = tensor<string, []>("k_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_pad_0 = const()[name = tensor<string, []>("k_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_dilations_0 = const()[name = tensor<string, []>("k_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_groups_0 = const()[name = tensor<string, []>("k_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_5_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_key_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35408192)))];
+            tensor<fp16, [1, 512, 1, 1500]> k_cast_fp16 = conv(dilations = k_dilations_0, groups = k_groups_0, pad = k_pad_0, pad_type = k_pad_type_0, strides = k_strides_0, weight = blocks_5_attn_key_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("k_cast_fp16")];
+            tensor<string, []> var_986_pad_type_0 = const()[name = tensor<string, []>("op_986_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_986_strides_0 = const()[name = tensor<string, []>("op_986_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_986_pad_0 = const()[name = tensor<string, []>("op_986_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_986_dilations_0 = const()[name = tensor<string, []>("op_986_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_986_groups_0 = const()[name = tensor<string, []>("op_986_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_5_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35932544)))];
+            tensor<fp16, [512]> blocks_5_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36456896)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_986_cast_fp16 = conv(bias = blocks_5_attn_value_bias_to_fp16, dilations = var_986_dilations_0, groups = var_986_groups_0, pad = var_986_pad_0, pad_type = var_986_pad_type_0, strides = var_986_strides_0, weight = blocks_5_attn_value_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_986_cast_fp16")];
+            tensor<int32, [8]> tile_15 = const()[name = tensor<string, []>("tile_15"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_989_axis_0 = const()[name = tensor<string, []>("op_989_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_989_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_989_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_989_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_989_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_989_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_989_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_989_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_989_cast_fp16_7 = split(axis = var_989_axis_0, split_sizes = tile_15, x = var_988_cast_fp16)[name = tensor<string, []>("op_989_cast_fp16")];
+            tensor<int32, [4]> var_998_perm_0 = const()[name = tensor<string, []>("op_998_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [8]> tile_16 = const()[name = tensor<string, []>("tile_16"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_999_axis_0 = const()[name = tensor<string, []>("op_999_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 512]> var_998_cast_fp16 = transpose(perm = var_998_perm_0, x = k_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1500, 1, 64]> var_999_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_999_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_999_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_999_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_999_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_999_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_999_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_999_cast_fp16_7 = split(axis = var_999_axis_0, split_sizes = tile_16, x = var_998_cast_fp16)[name = tensor<string, []>("op_999_cast_fp16")];
+            tensor<int32, [8]> tile_17 = const()[name = tensor<string, []>("tile_17"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1008_axis_0 = const()[name = tensor<string, []>("op_1008_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1008_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1008_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1008_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1008_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1008_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1008_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1008_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1008_cast_fp16_7 = split(axis = var_1008_axis_0, split_sizes = tile_17, x = var_986_cast_fp16)[name = tensor<string, []>("op_1008_cast_fp16")];
+            tensor<string, []> aw_81_equation_0 = const()[name = tensor<string, []>("aw_81_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_81_cast_fp16 = einsum(equation = aw_81_equation_0, values = (var_999_cast_fp16_0, var_989_cast_fp16_0))[name = tensor<string, []>("aw_81_cast_fp16")];
+            tensor<string, []> aw_83_equation_0 = const()[name = tensor<string, []>("aw_83_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_83_cast_fp16 = einsum(equation = aw_83_equation_0, values = (var_999_cast_fp16_1, var_989_cast_fp16_1))[name = tensor<string, []>("aw_83_cast_fp16")];
+            tensor<string, []> aw_85_equation_0 = const()[name = tensor<string, []>("aw_85_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_85_cast_fp16 = einsum(equation = aw_85_equation_0, values = (var_999_cast_fp16_2, var_989_cast_fp16_2))[name = tensor<string, []>("aw_85_cast_fp16")];
+            tensor<string, []> aw_87_equation_0 = const()[name = tensor<string, []>("aw_87_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_87_cast_fp16 = einsum(equation = aw_87_equation_0, values = (var_999_cast_fp16_3, var_989_cast_fp16_3))[name = tensor<string, []>("aw_87_cast_fp16")];
+            tensor<string, []> aw_89_equation_0 = const()[name = tensor<string, []>("aw_89_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_89_cast_fp16 = einsum(equation = aw_89_equation_0, values = (var_999_cast_fp16_4, var_989_cast_fp16_4))[name = tensor<string, []>("aw_89_cast_fp16")];
+            tensor<string, []> aw_91_equation_0 = const()[name = tensor<string, []>("aw_91_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_91_cast_fp16 = einsum(equation = aw_91_equation_0, values = (var_999_cast_fp16_5, var_989_cast_fp16_5))[name = tensor<string, []>("aw_91_cast_fp16")];
+            tensor<string, []> aw_93_equation_0 = const()[name = tensor<string, []>("aw_93_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_93_cast_fp16 = einsum(equation = aw_93_equation_0, values = (var_999_cast_fp16_6, var_989_cast_fp16_6))[name = tensor<string, []>("aw_93_cast_fp16")];
+            tensor<string, []> aw_equation_0 = const()[name = tensor<string, []>("aw_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_cast_fp16 = einsum(equation = aw_equation_0, values = (var_999_cast_fp16_7, var_989_cast_fp16_7))[name = tensor<string, []>("aw_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1033_cast_fp16 = softmax(axis = var_937, x = aw_81_cast_fp16)[name = tensor<string, []>("op_1033_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1034_cast_fp16 = softmax(axis = var_937, x = aw_83_cast_fp16)[name = tensor<string, []>("op_1034_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1035_cast_fp16 = softmax(axis = var_937, x = aw_85_cast_fp16)[name = tensor<string, []>("op_1035_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1036_cast_fp16 = softmax(axis = var_937, x = aw_87_cast_fp16)[name = tensor<string, []>("op_1036_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1037_cast_fp16 = softmax(axis = var_937, x = aw_89_cast_fp16)[name = tensor<string, []>("op_1037_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1038_cast_fp16 = softmax(axis = var_937, x = aw_91_cast_fp16)[name = tensor<string, []>("op_1038_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1039_cast_fp16 = softmax(axis = var_937, x = aw_93_cast_fp16)[name = tensor<string, []>("op_1039_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1040_cast_fp16 = softmax(axis = var_937, x = aw_cast_fp16)[name = tensor<string, []>("op_1040_cast_fp16")];
+            tensor<string, []> var_1042_equation_0 = const()[name = tensor<string, []>("op_1042_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1042_cast_fp16 = einsum(equation = var_1042_equation_0, values = (var_1008_cast_fp16_0, var_1033_cast_fp16))[name = tensor<string, []>("op_1042_cast_fp16")];
+            tensor<string, []> var_1044_equation_0 = const()[name = tensor<string, []>("op_1044_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1044_cast_fp16 = einsum(equation = var_1044_equation_0, values = (var_1008_cast_fp16_1, var_1034_cast_fp16))[name = tensor<string, []>("op_1044_cast_fp16")];
+            tensor<string, []> var_1046_equation_0 = const()[name = tensor<string, []>("op_1046_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1046_cast_fp16 = einsum(equation = var_1046_equation_0, values = (var_1008_cast_fp16_2, var_1035_cast_fp16))[name = tensor<string, []>("op_1046_cast_fp16")];
+            tensor<string, []> var_1048_equation_0 = const()[name = tensor<string, []>("op_1048_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1048_cast_fp16 = einsum(equation = var_1048_equation_0, values = (var_1008_cast_fp16_3, var_1036_cast_fp16))[name = tensor<string, []>("op_1048_cast_fp16")];
+            tensor<string, []> var_1050_equation_0 = const()[name = tensor<string, []>("op_1050_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1050_cast_fp16 = einsum(equation = var_1050_equation_0, values = (var_1008_cast_fp16_4, var_1037_cast_fp16))[name = tensor<string, []>("op_1050_cast_fp16")];
+            tensor<string, []> var_1052_equation_0 = const()[name = tensor<string, []>("op_1052_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1052_cast_fp16 = einsum(equation = var_1052_equation_0, values = (var_1008_cast_fp16_5, var_1038_cast_fp16))[name = tensor<string, []>("op_1052_cast_fp16")];
+            tensor<string, []> var_1054_equation_0 = const()[name = tensor<string, []>("op_1054_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1054_cast_fp16 = einsum(equation = var_1054_equation_0, values = (var_1008_cast_fp16_6, var_1039_cast_fp16))[name = tensor<string, []>("op_1054_cast_fp16")];
+            tensor<string, []> var_1056_equation_0 = const()[name = tensor<string, []>("op_1056_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1056_cast_fp16 = einsum(equation = var_1056_equation_0, values = (var_1008_cast_fp16_7, var_1040_cast_fp16))[name = tensor<string, []>("op_1056_cast_fp16")];
+            tensor<bool, []> input_55_interleave_0 = const()[name = tensor<string, []>("input_55_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_55_cast_fp16 = concat(axis = var_937, interleave = input_55_interleave_0, values = (var_1042_cast_fp16, var_1044_cast_fp16, var_1046_cast_fp16, var_1048_cast_fp16, var_1050_cast_fp16, var_1052_cast_fp16, var_1054_cast_fp16, var_1056_cast_fp16))[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<string, []> var_1065_pad_type_0 = const()[name = tensor<string, []>("op_1065_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1065_strides_0 = const()[name = tensor<string, []>("op_1065_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1065_pad_0 = const()[name = tensor<string, []>("op_1065_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1065_dilations_0 = const()[name = tensor<string, []>("op_1065_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1065_groups_0 = const()[name = tensor<string, []>("op_1065_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_5_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36457984)))];
+            tensor<fp16, [512]> blocks_5_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36982336)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_1065_cast_fp16 = conv(bias = blocks_5_attn_out_bias_to_fp16, dilations = var_1065_dilations_0, groups = var_1065_groups_0, pad = var_1065_pad_0, pad_type = var_1065_pad_type_0, strides = var_1065_strides_0, weight = blocks_5_attn_out_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("op_1065_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = var_1065_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> input_57_axes_0 = const()[name = tensor<string, []>("input_57_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_57_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_57_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36983424)))];
+            tensor<fp16, [512]> input_57_beta_0_to_fp16 = const()[name = tensor<string, []>("input_57_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36984512)))];
+            tensor<fp16, []> var_1075_to_fp16 = const()[name = tensor<string, []>("op_1075_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_57_cast_fp16 = layer_norm(axes = input_57_axes_0, beta = input_57_beta_0_to_fp16, epsilon = var_1075_to_fp16, gamma = input_57_gamma_0_to_fp16, x = inputs_23_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<string, []> input_59_pad_type_0 = const()[name = tensor<string, []>("input_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_59_strides_0 = const()[name = tensor<string, []>("input_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_59_pad_0 = const()[name = tensor<string, []>("input_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_59_dilations_0 = const()[name = tensor<string, []>("input_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_59_groups_0 = const()[name = tensor<string, []>("input_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [2048, 512, 1, 1]> blocks_5_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36985600)))];
+            tensor<fp16, [2048]> blocks_5_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39082816)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_59_cast_fp16 = conv(bias = blocks_5_mlp_0_bias_to_fp16, dilations = input_59_dilations_0, groups = input_59_groups_0, pad = input_59_pad_0, pad_type = input_59_pad_type_0, strides = input_59_strides_0, weight = blocks_5_mlp_0_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_59_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<string, []> var_1101_pad_type_0 = const()[name = tensor<string, []>("op_1101_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1101_strides_0 = const()[name = tensor<string, []>("op_1101_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1101_pad_0 = const()[name = tensor<string, []>("op_1101_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1101_dilations_0 = const()[name = tensor<string, []>("op_1101_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1101_groups_0 = const()[name = tensor<string, []>("op_1101_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 2048, 1, 1]> blocks_5_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39086976)))];
+            tensor<fp16, [512]> blocks_5_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41184192)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_1101_cast_fp16 = conv(bias = blocks_5_mlp_2_bias_to_fp16, dilations = var_1101_dilations_0, groups = var_1101_groups_0, pad = var_1101_pad_0, pad_type = var_1101_pad_type_0, strides = var_1101_strides_0, weight = blocks_5_mlp_2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("op_1101_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_cast_fp16 = add(x = inputs_23_cast_fp16, y = var_1101_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<int32, [1]> x_axes_0 = const()[name = tensor<string, []>("x_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> x_gamma_0_to_fp16 = const()[name = tensor<string, []>("x_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41185280)))];
+            tensor<fp16, [512]> x_beta_0_to_fp16 = const()[name = tensor<string, []>("x_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41186368)))];
+            tensor<fp16, []> var_1115_to_fp16 = const()[name = tensor<string, []>("op_1115_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> x_cast_fp16 = layer_norm(axes = x_axes_0, beta = x_beta_0_to_fp16, epsilon = var_1115_to_fp16, gamma = x_gamma_0_to_fp16, x = inputs_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
+            tensor<int32, [1]> var_1126_axes_0 = const()[name = tensor<string, []>("op_1126_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 512, 1500]> var_1126_cast_fp16 = squeeze(axes = var_1126_axes_0, x = x_cast_fp16)[name = tensor<string, []>("op_1126_cast_fp16")];
+            tensor<int32, [3]> var_1129_perm_0 = const()[name = tensor<string, []>("op_1129_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<string, []> var_1129_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_1129_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
+            tensor<fp16, [1, 1500, 512]> var_1129_cast_fp16 = transpose(perm = var_1129_perm_0, x = var_1126_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp32, [1, 1500, 512]> output = cast(dtype = var_1129_cast_fp16_to_fp32_dtype_0, x = var_1129_cast_fp16)[name = tensor<string, []>("cast_26")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/base.en/ggml-base.en-encoder.mlmodelc/weights/weight.bin b/base.en/ggml-base.en-encoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..86cecfd1f63401689bb12a0c1aa24cbb53c0295a
--- /dev/null
+++ b/base.en/ggml-base.en-encoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ddb14d23b665662c6bb22d659e9078e5304e7c4f346b66d66660b2efc07c461
+size 41187456
diff --git a/base.en/ggml-base.en.bin b/base.en/ggml-base.en.bin
new file mode 100644
index 0000000000000000000000000000000000000000..87c664c563ef3ff52424dd4fa925cf95b306dba6
--- /dev/null
+++ b/base.en/ggml-base.en.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a03779c86df3323075f5e796cb2ce5029f00ec8869eee3fdfb897afe36c6d002
+size 147964211
diff --git a/base/.DS_Store b/base/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..0cccdecbf552204a0804f7b0a3e84b5e24ee345e
Binary files /dev/null and b/base/.DS_Store differ
diff --git a/base/ggml-base-encoder.mlmodelc/analytics/coremldata.bin b/base/ggml-base-encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4d3ac25920351891c100fec2447ae7af22cd74a5
--- /dev/null
+++ b/base/ggml-base-encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3336d2fa661565cda8254e2605c4a0d14b30cb7166abd534a442d036dc7210b2
+size 243
diff --git a/base/ggml-base-encoder.mlmodelc/coremldata.bin b/base/ggml-base-encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4d9a467f60ab78ce0c7da4e8af0ea14dd7b68774
--- /dev/null
+++ b/base/ggml-base-encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea2fd8343a60c71e243d887740ea21c619fe40d2bf4b898f3c499df4af43433b
+size 320
diff --git a/base/ggml-base-encoder.mlmodelc/metadata.json b/base/ggml-base-encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..de25825ed2d1cde0d2a5e4059d73019674f12f4b
--- /dev/null
+++ b/base/ggml-base-encoder.mlmodelc/metadata.json
@@ -0,0 +1,71 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1500 × 512)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 512]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 6,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 6,
+      "Gelu" : 8,
+      "LayerNorm" : 13,
+      "Transpose" : 7,
+      "Softmax" : 48,
+      "Squeeze" : 1,
+      "Cast" : 2,
+      "Add" : 13,
+      "Einsum" : 96,
+      "ExpandDims" : 1,
+      "Split" : 18,
+      "Conv" : 38
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "12.0",
+      "tvOS" : "15.0",
+      "visionOS" : "1.0",
+      "watchOS" : "8.0",
+      "iOS" : "15.0",
+      "macCatalyst" : "15.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.version" : "8.3.0",
+      "com.github.apple.coremltools.source" : "torch==2.2.2"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 80 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "coreml_encoder_base",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/base/ggml-base-encoder.mlmodelc/model.mil b/base/ggml-base-encoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..19090e632d6d8b0353d98e50f2756e7d5f930331
--- /dev/null
+++ b/base/ggml-base-encoder.mlmodelc/model.mil
@@ -0,0 +1,733 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.2.2"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
+{
+    func main<ios15>(tensor<fp32, [1, 80, 3000]> logmel_data) {
+            tensor<string, []> var_32_pad_type_0 = const()[name = tensor<string, []>("op_32_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_32_pad_0 = const()[name = tensor<string, []>("op_32_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_32_strides_0 = const()[name = tensor<string, []>("op_32_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_32_dilations_0 = const()[name = tensor<string, []>("op_32_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_32_groups_0 = const()[name = tensor<string, []>("op_32_groups_0"), val = tensor<int32, []>(1)];
+            tensor<string, []> logmel_data_to_fp16_dtype_0 = const()[name = tensor<string, []>("logmel_data_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [512, 80, 3]> const_0_to_fp16 = const()[name = tensor<string, []>("const_0_to_fp16"), val = tensor<fp16, [512, 80, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [512]> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245888)))];
+            tensor<fp16, [1, 80, 3000]> logmel_data_to_fp16 = cast(dtype = logmel_data_to_fp16_dtype_0, x = logmel_data)[name = tensor<string, []>("cast_28")];
+            tensor<fp16, [1, 512, 3000]> var_32_cast_fp16 = conv(bias = const_1_to_fp16, dilations = var_32_dilations_0, groups = var_32_groups_0, pad = var_32_pad_0, pad_type = var_32_pad_type_0, strides = var_32_strides_0, weight = const_0_to_fp16, x = logmel_data_to_fp16)[name = tensor<string, []>("op_32_cast_fp16")];
+            tensor<string, []> input_1_mode_0 = const()[name = tensor<string, []>("input_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 512, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_32_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<string, []> var_50_pad_type_0 = const()[name = tensor<string, []>("op_50_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_50_pad_0 = const()[name = tensor<string, []>("op_50_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_50_strides_0 = const()[name = tensor<string, []>("op_50_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_50_dilations_0 = const()[name = tensor<string, []>("op_50_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_50_groups_0 = const()[name = tensor<string, []>("op_50_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 3]> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, [512, 512, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(246976)))];
+            tensor<fp16, [512]> const_3_to_fp16 = const()[name = tensor<string, []>("const_3_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1819904)))];
+            tensor<fp16, [1, 512, 1500]> var_50_cast_fp16 = conv(bias = const_3_to_fp16, dilations = var_50_dilations_0, groups = var_50_groups_0, pad = var_50_pad_0, pad_type = var_50_pad_type_0, strides = var_50_strides_0, weight = const_2_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("op_50_cast_fp16")];
+            tensor<string, []> x_3_mode_0 = const()[name = tensor<string, []>("x_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 512, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_50_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
+            tensor<fp16, [512, 1500]> var_55_to_fp16 = const()[name = tensor<string, []>("op_55_to_fp16"), val = tensor<fp16, [512, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1820992)))];
+            tensor<fp16, [1, 512, 1500]> var_57_cast_fp16 = add(x = x_3_cast_fp16, y = var_55_to_fp16)[name = tensor<string, []>("op_57_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_57_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_72 = const()[name = tensor<string, []>("op_72"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3357056)))];
+            tensor<fp16, [512]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3358144)))];
+            tensor<fp16, []> var_88_to_fp16 = const()[name = tensor<string, []>("op_88_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_3_cast_fp16 = layer_norm(axes = input_3_axes_0, beta = input_3_beta_0_to_fp16, epsilon = var_88_to_fp16, gamma = input_3_gamma_0_to_fp16, x = inputs_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<string, []> q_1_pad_type_0 = const()[name = tensor<string, []>("q_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_1_strides_0 = const()[name = tensor<string, []>("q_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_1_pad_0 = const()[name = tensor<string, []>("q_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_1_dilations_0 = const()[name = tensor<string, []>("q_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_1_groups_0 = const()[name = tensor<string, []>("q_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> var_123_weight_0_to_fp16 = const()[name = tensor<string, []>("op_123_weight_0_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3359232)))];
+            tensor<fp16, [512]> var_123_bias_0_to_fp16 = const()[name = tensor<string, []>("op_123_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3883584)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_123_cast_fp16 = conv(bias = var_123_bias_0_to_fp16, dilations = q_1_dilations_0, groups = q_1_groups_0, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = q_1_strides_0, weight = var_123_weight_0_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_123_cast_fp16")];
+            tensor<string, []> k_1_pad_type_0 = const()[name = tensor<string, []>("k_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_1_strides_0 = const()[name = tensor<string, []>("k_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_1_pad_0 = const()[name = tensor<string, []>("k_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_1_dilations_0 = const()[name = tensor<string, []>("k_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_1_groups_0 = const()[name = tensor<string, []>("k_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_0_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_key_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3884672)))];
+            tensor<fp16, [1, 512, 1, 1500]> k_1_cast_fp16 = conv(dilations = k_1_dilations_0, groups = k_1_groups_0, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = k_1_strides_0, weight = blocks_0_attn_key_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("k_1_cast_fp16")];
+            tensor<string, []> var_121_pad_type_0 = const()[name = tensor<string, []>("op_121_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_121_strides_0 = const()[name = tensor<string, []>("op_121_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_121_pad_0 = const()[name = tensor<string, []>("op_121_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_121_dilations_0 = const()[name = tensor<string, []>("op_121_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_121_groups_0 = const()[name = tensor<string, []>("op_121_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_0_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4409024)))];
+            tensor<fp16, [512]> blocks_0_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4933376)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_121_cast_fp16 = conv(bias = blocks_0_attn_value_bias_to_fp16, dilations = var_121_dilations_0, groups = var_121_groups_0, pad = var_121_pad_0, pad_type = var_121_pad_type_0, strides = var_121_strides_0, weight = blocks_0_attn_value_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_121_cast_fp16")];
+            tensor<int32, [8]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_124_axis_0 = const()[name = tensor<string, []>("op_124_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_124_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_124_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_124_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_124_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_124_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_124_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_124_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_124_cast_fp16_7 = split(axis = var_124_axis_0, split_sizes = tile_0, x = var_123_cast_fp16)[name = tensor<string, []>("op_124_cast_fp16")];
+            tensor<int32, [4]> var_133_perm_0 = const()[name = tensor<string, []>("op_133_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [8]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_134_axis_0 = const()[name = tensor<string, []>("op_134_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 512]> var_133_cast_fp16 = transpose(perm = var_133_perm_0, x = k_1_cast_fp16)[name = tensor<string, []>("transpose_6")];
+            tensor<fp16, [1, 1500, 1, 64]> var_134_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_134_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_134_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_134_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_134_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_134_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_134_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_134_cast_fp16_7 = split(axis = var_134_axis_0, split_sizes = tile_1, x = var_133_cast_fp16)[name = tensor<string, []>("op_134_cast_fp16")];
+            tensor<int32, [8]> tile_2 = const()[name = tensor<string, []>("tile_2"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_143_axis_0 = const()[name = tensor<string, []>("op_143_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_143_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_143_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_143_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_143_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_143_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_143_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_143_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_143_cast_fp16_7 = split(axis = var_143_axis_0, split_sizes = tile_2, x = var_121_cast_fp16)[name = tensor<string, []>("op_143_cast_fp16")];
+            tensor<string, []> aw_1_equation_0 = const()[name = tensor<string, []>("aw_1_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1_cast_fp16 = einsum(equation = aw_1_equation_0, values = (var_134_cast_fp16_0, var_124_cast_fp16_0))[name = tensor<string, []>("aw_1_cast_fp16")];
+            tensor<string, []> aw_3_equation_0 = const()[name = tensor<string, []>("aw_3_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_3_cast_fp16 = einsum(equation = aw_3_equation_0, values = (var_134_cast_fp16_1, var_124_cast_fp16_1))[name = tensor<string, []>("aw_3_cast_fp16")];
+            tensor<string, []> aw_5_equation_0 = const()[name = tensor<string, []>("aw_5_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_5_cast_fp16 = einsum(equation = aw_5_equation_0, values = (var_134_cast_fp16_2, var_124_cast_fp16_2))[name = tensor<string, []>("aw_5_cast_fp16")];
+            tensor<string, []> aw_7_equation_0 = const()[name = tensor<string, []>("aw_7_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_7_cast_fp16 = einsum(equation = aw_7_equation_0, values = (var_134_cast_fp16_3, var_124_cast_fp16_3))[name = tensor<string, []>("aw_7_cast_fp16")];
+            tensor<string, []> aw_9_equation_0 = const()[name = tensor<string, []>("aw_9_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_9_cast_fp16 = einsum(equation = aw_9_equation_0, values = (var_134_cast_fp16_4, var_124_cast_fp16_4))[name = tensor<string, []>("aw_9_cast_fp16")];
+            tensor<string, []> aw_11_equation_0 = const()[name = tensor<string, []>("aw_11_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_11_cast_fp16 = einsum(equation = aw_11_equation_0, values = (var_134_cast_fp16_5, var_124_cast_fp16_5))[name = tensor<string, []>("aw_11_cast_fp16")];
+            tensor<string, []> aw_13_equation_0 = const()[name = tensor<string, []>("aw_13_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_13_cast_fp16 = einsum(equation = aw_13_equation_0, values = (var_134_cast_fp16_6, var_124_cast_fp16_6))[name = tensor<string, []>("aw_13_cast_fp16")];
+            tensor<string, []> aw_15_equation_0 = const()[name = tensor<string, []>("aw_15_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_15_cast_fp16 = einsum(equation = aw_15_equation_0, values = (var_134_cast_fp16_7, var_124_cast_fp16_7))[name = tensor<string, []>("aw_15_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_168_cast_fp16 = softmax(axis = var_72, x = aw_1_cast_fp16)[name = tensor<string, []>("op_168_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_169_cast_fp16 = softmax(axis = var_72, x = aw_3_cast_fp16)[name = tensor<string, []>("op_169_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_170_cast_fp16 = softmax(axis = var_72, x = aw_5_cast_fp16)[name = tensor<string, []>("op_170_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_171_cast_fp16 = softmax(axis = var_72, x = aw_7_cast_fp16)[name = tensor<string, []>("op_171_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_172_cast_fp16 = softmax(axis = var_72, x = aw_9_cast_fp16)[name = tensor<string, []>("op_172_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_173_cast_fp16 = softmax(axis = var_72, x = aw_11_cast_fp16)[name = tensor<string, []>("op_173_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_174_cast_fp16 = softmax(axis = var_72, x = aw_13_cast_fp16)[name = tensor<string, []>("op_174_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_175_cast_fp16 = softmax(axis = var_72, x = aw_15_cast_fp16)[name = tensor<string, []>("op_175_cast_fp16")];
+            tensor<string, []> var_177_equation_0 = const()[name = tensor<string, []>("op_177_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_177_cast_fp16 = einsum(equation = var_177_equation_0, values = (var_143_cast_fp16_0, var_168_cast_fp16))[name = tensor<string, []>("op_177_cast_fp16")];
+            tensor<string, []> var_179_equation_0 = const()[name = tensor<string, []>("op_179_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_179_cast_fp16 = einsum(equation = var_179_equation_0, values = (var_143_cast_fp16_1, var_169_cast_fp16))[name = tensor<string, []>("op_179_cast_fp16")];
+            tensor<string, []> var_181_equation_0 = const()[name = tensor<string, []>("op_181_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_181_cast_fp16 = einsum(equation = var_181_equation_0, values = (var_143_cast_fp16_2, var_170_cast_fp16))[name = tensor<string, []>("op_181_cast_fp16")];
+            tensor<string, []> var_183_equation_0 = const()[name = tensor<string, []>("op_183_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_183_cast_fp16 = einsum(equation = var_183_equation_0, values = (var_143_cast_fp16_3, var_171_cast_fp16))[name = tensor<string, []>("op_183_cast_fp16")];
+            tensor<string, []> var_185_equation_0 = const()[name = tensor<string, []>("op_185_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_185_cast_fp16 = einsum(equation = var_185_equation_0, values = (var_143_cast_fp16_4, var_172_cast_fp16))[name = tensor<string, []>("op_185_cast_fp16")];
+            tensor<string, []> var_187_equation_0 = const()[name = tensor<string, []>("op_187_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_187_cast_fp16 = einsum(equation = var_187_equation_0, values = (var_143_cast_fp16_5, var_173_cast_fp16))[name = tensor<string, []>("op_187_cast_fp16")];
+            tensor<string, []> var_189_equation_0 = const()[name = tensor<string, []>("op_189_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_189_cast_fp16 = einsum(equation = var_189_equation_0, values = (var_143_cast_fp16_6, var_174_cast_fp16))[name = tensor<string, []>("op_189_cast_fp16")];
+            tensor<string, []> var_191_equation_0 = const()[name = tensor<string, []>("op_191_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_191_cast_fp16 = einsum(equation = var_191_equation_0, values = (var_143_cast_fp16_7, var_175_cast_fp16))[name = tensor<string, []>("op_191_cast_fp16")];
+            tensor<bool, []> input_5_interleave_0 = const()[name = tensor<string, []>("input_5_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_5_cast_fp16 = concat(axis = var_72, interleave = input_5_interleave_0, values = (var_177_cast_fp16, var_179_cast_fp16, var_181_cast_fp16, var_183_cast_fp16, var_185_cast_fp16, var_187_cast_fp16, var_189_cast_fp16, var_191_cast_fp16))[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> var_200_pad_type_0 = const()[name = tensor<string, []>("op_200_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_200_strides_0 = const()[name = tensor<string, []>("op_200_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_200_pad_0 = const()[name = tensor<string, []>("op_200_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_200_dilations_0 = const()[name = tensor<string, []>("op_200_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_200_groups_0 = const()[name = tensor<string, []>("op_200_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_0_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4934464)))];
+            tensor<fp16, [512]> blocks_0_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5458816)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_200_cast_fp16 = conv(bias = blocks_0_attn_out_bias_to_fp16, dilations = var_200_dilations_0, groups = var_200_groups_0, pad = var_200_pad_0, pad_type = var_200_pad_type_0, strides = var_200_strides_0, weight = blocks_0_attn_out_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("op_200_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = var_200_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> input_7_axes_0 = const()[name = tensor<string, []>("input_7_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_7_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_7_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5459904)))];
+            tensor<fp16, [512]> input_7_beta_0_to_fp16 = const()[name = tensor<string, []>("input_7_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5460992)))];
+            tensor<fp16, []> var_210_to_fp16 = const()[name = tensor<string, []>("op_210_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = input_7_beta_0_to_fp16, epsilon = var_210_to_fp16, gamma = input_7_gamma_0_to_fp16, x = inputs_3_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_pad_type_0 = const()[name = tensor<string, []>("input_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_9_strides_0 = const()[name = tensor<string, []>("input_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_9_pad_0 = const()[name = tensor<string, []>("input_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_9_dilations_0 = const()[name = tensor<string, []>("input_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_9_groups_0 = const()[name = tensor<string, []>("input_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [2048, 512, 1, 1]> blocks_0_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5462080)))];
+            tensor<fp16, [2048]> blocks_0_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7559296)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_9_cast_fp16 = conv(bias = blocks_0_mlp_0_bias_to_fp16, dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = blocks_0_mlp_0_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<string, []> input_11_mode_0 = const()[name = tensor<string, []>("input_11_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_11_cast_fp16 = gelu(mode = input_11_mode_0, x = input_9_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<string, []> var_236_pad_type_0 = const()[name = tensor<string, []>("op_236_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_236_strides_0 = const()[name = tensor<string, []>("op_236_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_236_pad_0 = const()[name = tensor<string, []>("op_236_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_236_dilations_0 = const()[name = tensor<string, []>("op_236_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_236_groups_0 = const()[name = tensor<string, []>("op_236_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 2048, 1, 1]> blocks_0_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7563456)))];
+            tensor<fp16, [512]> blocks_0_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9660672)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_236_cast_fp16 = conv(bias = blocks_0_mlp_2_bias_to_fp16, dilations = var_236_dilations_0, groups = var_236_groups_0, pad = var_236_pad_0, pad_type = var_236_pad_type_0, strides = var_236_strides_0, weight = blocks_0_mlp_2_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("op_236_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = var_236_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_245 = const()[name = tensor<string, []>("op_245"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_13_axes_0 = const()[name = tensor<string, []>("input_13_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_13_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9661760)))];
+            tensor<fp16, [512]> input_13_beta_0_to_fp16 = const()[name = tensor<string, []>("input_13_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9662848)))];
+            tensor<fp16, []> var_261_to_fp16 = const()[name = tensor<string, []>("op_261_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = input_13_beta_0_to_fp16, epsilon = var_261_to_fp16, gamma = input_13_gamma_0_to_fp16, x = inputs_5_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> q_3_pad_type_0 = const()[name = tensor<string, []>("q_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_3_strides_0 = const()[name = tensor<string, []>("q_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_3_pad_0 = const()[name = tensor<string, []>("q_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_3_dilations_0 = const()[name = tensor<string, []>("q_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_3_groups_0 = const()[name = tensor<string, []>("q_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> var_296_weight_0_to_fp16 = const()[name = tensor<string, []>("op_296_weight_0_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9663936)))];
+            tensor<fp16, [512]> var_296_bias_0_to_fp16 = const()[name = tensor<string, []>("op_296_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10188288)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_296_cast_fp16 = conv(bias = var_296_bias_0_to_fp16, dilations = q_3_dilations_0, groups = q_3_groups_0, pad = q_3_pad_0, pad_type = q_3_pad_type_0, strides = q_3_strides_0, weight = var_296_weight_0_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_296_cast_fp16")];
+            tensor<string, []> k_3_pad_type_0 = const()[name = tensor<string, []>("k_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_3_strides_0 = const()[name = tensor<string, []>("k_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_3_pad_0 = const()[name = tensor<string, []>("k_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_3_dilations_0 = const()[name = tensor<string, []>("k_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_3_groups_0 = const()[name = tensor<string, []>("k_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_1_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_key_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10189376)))];
+            tensor<fp16, [1, 512, 1, 1500]> k_3_cast_fp16 = conv(dilations = k_3_dilations_0, groups = k_3_groups_0, pad = k_3_pad_0, pad_type = k_3_pad_type_0, strides = k_3_strides_0, weight = blocks_1_attn_key_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("k_3_cast_fp16")];
+            tensor<string, []> var_294_pad_type_0 = const()[name = tensor<string, []>("op_294_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_294_strides_0 = const()[name = tensor<string, []>("op_294_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_294_pad_0 = const()[name = tensor<string, []>("op_294_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_294_dilations_0 = const()[name = tensor<string, []>("op_294_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_294_groups_0 = const()[name = tensor<string, []>("op_294_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_1_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10713728)))];
+            tensor<fp16, [512]> blocks_1_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11238080)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_294_cast_fp16 = conv(bias = blocks_1_attn_value_bias_to_fp16, dilations = var_294_dilations_0, groups = var_294_groups_0, pad = var_294_pad_0, pad_type = var_294_pad_type_0, strides = var_294_strides_0, weight = blocks_1_attn_value_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_294_cast_fp16")];
+            tensor<int32, [8]> tile_3 = const()[name = tensor<string, []>("tile_3"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_297_axis_0 = const()[name = tensor<string, []>("op_297_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_297_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_297_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_297_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_297_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_297_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_297_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_297_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_297_cast_fp16_7 = split(axis = var_297_axis_0, split_sizes = tile_3, x = var_296_cast_fp16)[name = tensor<string, []>("op_297_cast_fp16")];
+            tensor<int32, [4]> var_306_perm_0 = const()[name = tensor<string, []>("op_306_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [8]> tile_4 = const()[name = tensor<string, []>("tile_4"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_307_axis_0 = const()[name = tensor<string, []>("op_307_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 512]> var_306_cast_fp16 = transpose(perm = var_306_perm_0, x = k_3_cast_fp16)[name = tensor<string, []>("transpose_5")];
+            tensor<fp16, [1, 1500, 1, 64]> var_307_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_307_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_307_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_307_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_307_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_307_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_307_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_307_cast_fp16_7 = split(axis = var_307_axis_0, split_sizes = tile_4, x = var_306_cast_fp16)[name = tensor<string, []>("op_307_cast_fp16")];
+            tensor<int32, [8]> tile_5 = const()[name = tensor<string, []>("tile_5"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_316_axis_0 = const()[name = tensor<string, []>("op_316_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_316_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_316_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_316_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_316_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_316_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_316_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_316_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_316_cast_fp16_7 = split(axis = var_316_axis_0, split_sizes = tile_5, x = var_294_cast_fp16)[name = tensor<string, []>("op_316_cast_fp16")];
+            tensor<string, []> aw_17_equation_0 = const()[name = tensor<string, []>("aw_17_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_17_cast_fp16 = einsum(equation = aw_17_equation_0, values = (var_307_cast_fp16_0, var_297_cast_fp16_0))[name = tensor<string, []>("aw_17_cast_fp16")];
+            tensor<string, []> aw_19_equation_0 = const()[name = tensor<string, []>("aw_19_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_19_cast_fp16 = einsum(equation = aw_19_equation_0, values = (var_307_cast_fp16_1, var_297_cast_fp16_1))[name = tensor<string, []>("aw_19_cast_fp16")];
+            tensor<string, []> aw_21_equation_0 = const()[name = tensor<string, []>("aw_21_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_21_cast_fp16 = einsum(equation = aw_21_equation_0, values = (var_307_cast_fp16_2, var_297_cast_fp16_2))[name = tensor<string, []>("aw_21_cast_fp16")];
+            tensor<string, []> aw_23_equation_0 = const()[name = tensor<string, []>("aw_23_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_23_cast_fp16 = einsum(equation = aw_23_equation_0, values = (var_307_cast_fp16_3, var_297_cast_fp16_3))[name = tensor<string, []>("aw_23_cast_fp16")];
+            tensor<string, []> aw_25_equation_0 = const()[name = tensor<string, []>("aw_25_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_25_cast_fp16 = einsum(equation = aw_25_equation_0, values = (var_307_cast_fp16_4, var_297_cast_fp16_4))[name = tensor<string, []>("aw_25_cast_fp16")];
+            tensor<string, []> aw_27_equation_0 = const()[name = tensor<string, []>("aw_27_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_27_cast_fp16 = einsum(equation = aw_27_equation_0, values = (var_307_cast_fp16_5, var_297_cast_fp16_5))[name = tensor<string, []>("aw_27_cast_fp16")];
+            tensor<string, []> aw_29_equation_0 = const()[name = tensor<string, []>("aw_29_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_29_cast_fp16 = einsum(equation = aw_29_equation_0, values = (var_307_cast_fp16_6, var_297_cast_fp16_6))[name = tensor<string, []>("aw_29_cast_fp16")];
+            tensor<string, []> aw_31_equation_0 = const()[name = tensor<string, []>("aw_31_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_31_cast_fp16 = einsum(equation = aw_31_equation_0, values = (var_307_cast_fp16_7, var_297_cast_fp16_7))[name = tensor<string, []>("aw_31_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_341_cast_fp16 = softmax(axis = var_245, x = aw_17_cast_fp16)[name = tensor<string, []>("op_341_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_342_cast_fp16 = softmax(axis = var_245, x = aw_19_cast_fp16)[name = tensor<string, []>("op_342_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_343_cast_fp16 = softmax(axis = var_245, x = aw_21_cast_fp16)[name = tensor<string, []>("op_343_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_344_cast_fp16 = softmax(axis = var_245, x = aw_23_cast_fp16)[name = tensor<string, []>("op_344_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_345_cast_fp16 = softmax(axis = var_245, x = aw_25_cast_fp16)[name = tensor<string, []>("op_345_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_346_cast_fp16 = softmax(axis = var_245, x = aw_27_cast_fp16)[name = tensor<string, []>("op_346_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_347_cast_fp16 = softmax(axis = var_245, x = aw_29_cast_fp16)[name = tensor<string, []>("op_347_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_348_cast_fp16 = softmax(axis = var_245, x = aw_31_cast_fp16)[name = tensor<string, []>("op_348_cast_fp16")];
+            tensor<string, []> var_350_equation_0 = const()[name = tensor<string, []>("op_350_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_350_cast_fp16 = einsum(equation = var_350_equation_0, values = (var_316_cast_fp16_0, var_341_cast_fp16))[name = tensor<string, []>("op_350_cast_fp16")];
+            tensor<string, []> var_352_equation_0 = const()[name = tensor<string, []>("op_352_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_352_cast_fp16 = einsum(equation = var_352_equation_0, values = (var_316_cast_fp16_1, var_342_cast_fp16))[name = tensor<string, []>("op_352_cast_fp16")];
+            tensor<string, []> var_354_equation_0 = const()[name = tensor<string, []>("op_354_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_354_cast_fp16 = einsum(equation = var_354_equation_0, values = (var_316_cast_fp16_2, var_343_cast_fp16))[name = tensor<string, []>("op_354_cast_fp16")];
+            tensor<string, []> var_356_equation_0 = const()[name = tensor<string, []>("op_356_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_356_cast_fp16 = einsum(equation = var_356_equation_0, values = (var_316_cast_fp16_3, var_344_cast_fp16))[name = tensor<string, []>("op_356_cast_fp16")];
+            tensor<string, []> var_358_equation_0 = const()[name = tensor<string, []>("op_358_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_358_cast_fp16 = einsum(equation = var_358_equation_0, values = (var_316_cast_fp16_4, var_345_cast_fp16))[name = tensor<string, []>("op_358_cast_fp16")];
+            tensor<string, []> var_360_equation_0 = const()[name = tensor<string, []>("op_360_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_360_cast_fp16 = einsum(equation = var_360_equation_0, values = (var_316_cast_fp16_5, var_346_cast_fp16))[name = tensor<string, []>("op_360_cast_fp16")];
+            tensor<string, []> var_362_equation_0 = const()[name = tensor<string, []>("op_362_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_362_cast_fp16 = einsum(equation = var_362_equation_0, values = (var_316_cast_fp16_6, var_347_cast_fp16))[name = tensor<string, []>("op_362_cast_fp16")];
+            tensor<string, []> var_364_equation_0 = const()[name = tensor<string, []>("op_364_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_364_cast_fp16 = einsum(equation = var_364_equation_0, values = (var_316_cast_fp16_7, var_348_cast_fp16))[name = tensor<string, []>("op_364_cast_fp16")];
+            tensor<bool, []> input_15_interleave_0 = const()[name = tensor<string, []>("input_15_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_15_cast_fp16 = concat(axis = var_245, interleave = input_15_interleave_0, values = (var_350_cast_fp16, var_352_cast_fp16, var_354_cast_fp16, var_356_cast_fp16, var_358_cast_fp16, var_360_cast_fp16, var_362_cast_fp16, var_364_cast_fp16))[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<string, []> var_373_pad_type_0 = const()[name = tensor<string, []>("op_373_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_373_strides_0 = const()[name = tensor<string, []>("op_373_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_373_pad_0 = const()[name = tensor<string, []>("op_373_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_373_dilations_0 = const()[name = tensor<string, []>("op_373_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_373_groups_0 = const()[name = tensor<string, []>("op_373_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_1_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11239168)))];
+            tensor<fp16, [512]> blocks_1_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11763520)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_373_cast_fp16 = conv(bias = blocks_1_attn_out_bias_to_fp16, dilations = var_373_dilations_0, groups = var_373_groups_0, pad = var_373_pad_0, pad_type = var_373_pad_type_0, strides = var_373_strides_0, weight = blocks_1_attn_out_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("op_373_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = var_373_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> input_17_axes_0 = const()[name = tensor<string, []>("input_17_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_17_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_17_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11764608)))];
+            tensor<fp16, [512]> input_17_beta_0_to_fp16 = const()[name = tensor<string, []>("input_17_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11765696)))];
+            tensor<fp16, []> var_383_to_fp16 = const()[name = tensor<string, []>("op_383_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_17_cast_fp16 = layer_norm(axes = input_17_axes_0, beta = input_17_beta_0_to_fp16, epsilon = var_383_to_fp16, gamma = input_17_gamma_0_to_fp16, x = inputs_7_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_pad_type_0 = const()[name = tensor<string, []>("input_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_19_strides_0 = const()[name = tensor<string, []>("input_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_19_pad_0 = const()[name = tensor<string, []>("input_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_19_dilations_0 = const()[name = tensor<string, []>("input_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_19_groups_0 = const()[name = tensor<string, []>("input_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [2048, 512, 1, 1]> blocks_1_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11766784)))];
+            tensor<fp16, [2048]> blocks_1_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13864000)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_19_cast_fp16 = conv(bias = blocks_1_mlp_0_bias_to_fp16, dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = blocks_1_mlp_0_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<string, []> input_21_mode_0 = const()[name = tensor<string, []>("input_21_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_21_cast_fp16 = gelu(mode = input_21_mode_0, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> var_409_pad_type_0 = const()[name = tensor<string, []>("op_409_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_409_strides_0 = const()[name = tensor<string, []>("op_409_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_409_pad_0 = const()[name = tensor<string, []>("op_409_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_409_dilations_0 = const()[name = tensor<string, []>("op_409_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_409_groups_0 = const()[name = tensor<string, []>("op_409_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 2048, 1, 1]> blocks_1_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13868160)))];
+            tensor<fp16, [512]> blocks_1_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15965376)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_409_cast_fp16 = conv(bias = blocks_1_mlp_2_bias_to_fp16, dilations = var_409_dilations_0, groups = var_409_groups_0, pad = var_409_pad_0, pad_type = var_409_pad_type_0, strides = var_409_strides_0, weight = blocks_1_mlp_2_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("op_409_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = var_409_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_418 = const()[name = tensor<string, []>("op_418"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_23_axes_0 = const()[name = tensor<string, []>("input_23_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_23_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15966464)))];
+            tensor<fp16, [512]> input_23_beta_0_to_fp16 = const()[name = tensor<string, []>("input_23_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15967552)))];
+            tensor<fp16, []> var_434_to_fp16 = const()[name = tensor<string, []>("op_434_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_23_cast_fp16 = layer_norm(axes = input_23_axes_0, beta = input_23_beta_0_to_fp16, epsilon = var_434_to_fp16, gamma = input_23_gamma_0_to_fp16, x = inputs_9_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<string, []> q_5_pad_type_0 = const()[name = tensor<string, []>("q_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_5_strides_0 = const()[name = tensor<string, []>("q_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_5_pad_0 = const()[name = tensor<string, []>("q_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_5_dilations_0 = const()[name = tensor<string, []>("q_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_5_groups_0 = const()[name = tensor<string, []>("q_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> var_469_weight_0_to_fp16 = const()[name = tensor<string, []>("op_469_weight_0_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15968640)))];
+            tensor<fp16, [512]> var_469_bias_0_to_fp16 = const()[name = tensor<string, []>("op_469_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16492992)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_469_cast_fp16 = conv(bias = var_469_bias_0_to_fp16, dilations = q_5_dilations_0, groups = q_5_groups_0, pad = q_5_pad_0, pad_type = q_5_pad_type_0, strides = q_5_strides_0, weight = var_469_weight_0_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_469_cast_fp16")];
+            tensor<string, []> k_5_pad_type_0 = const()[name = tensor<string, []>("k_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_5_strides_0 = const()[name = tensor<string, []>("k_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_5_pad_0 = const()[name = tensor<string, []>("k_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_5_dilations_0 = const()[name = tensor<string, []>("k_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_5_groups_0 = const()[name = tensor<string, []>("k_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_2_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_key_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16494080)))];
+            tensor<fp16, [1, 512, 1, 1500]> k_5_cast_fp16 = conv(dilations = k_5_dilations_0, groups = k_5_groups_0, pad = k_5_pad_0, pad_type = k_5_pad_type_0, strides = k_5_strides_0, weight = blocks_2_attn_key_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("k_5_cast_fp16")];
+            tensor<string, []> var_467_pad_type_0 = const()[name = tensor<string, []>("op_467_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_467_strides_0 = const()[name = tensor<string, []>("op_467_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_467_pad_0 = const()[name = tensor<string, []>("op_467_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_467_dilations_0 = const()[name = tensor<string, []>("op_467_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_467_groups_0 = const()[name = tensor<string, []>("op_467_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_2_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17018432)))];
+            tensor<fp16, [512]> blocks_2_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17542784)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_467_cast_fp16 = conv(bias = blocks_2_attn_value_bias_to_fp16, dilations = var_467_dilations_0, groups = var_467_groups_0, pad = var_467_pad_0, pad_type = var_467_pad_type_0, strides = var_467_strides_0, weight = blocks_2_attn_value_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_467_cast_fp16")];
+            tensor<int32, [8]> tile_6 = const()[name = tensor<string, []>("tile_6"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_470_axis_0 = const()[name = tensor<string, []>("op_470_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_470_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_470_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_470_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_470_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_470_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_470_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_470_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_470_cast_fp16_7 = split(axis = var_470_axis_0, split_sizes = tile_6, x = var_469_cast_fp16)[name = tensor<string, []>("op_470_cast_fp16")];
+            tensor<int32, [4]> var_479_perm_0 = const()[name = tensor<string, []>("op_479_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [8]> tile_7 = const()[name = tensor<string, []>("tile_7"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_480_axis_0 = const()[name = tensor<string, []>("op_480_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 512]> var_479_cast_fp16 = transpose(perm = var_479_perm_0, x = k_5_cast_fp16)[name = tensor<string, []>("transpose_4")];
+            tensor<fp16, [1, 1500, 1, 64]> var_480_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_480_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_480_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_480_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_480_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_480_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_480_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_480_cast_fp16_7 = split(axis = var_480_axis_0, split_sizes = tile_7, x = var_479_cast_fp16)[name = tensor<string, []>("op_480_cast_fp16")];
+            tensor<int32, [8]> tile_8 = const()[name = tensor<string, []>("tile_8"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_489_axis_0 = const()[name = tensor<string, []>("op_489_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_489_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_489_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_489_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_489_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_489_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_489_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_489_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_489_cast_fp16_7 = split(axis = var_489_axis_0, split_sizes = tile_8, x = var_467_cast_fp16)[name = tensor<string, []>("op_489_cast_fp16")];
+            tensor<string, []> aw_33_equation_0 = const()[name = tensor<string, []>("aw_33_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_33_cast_fp16 = einsum(equation = aw_33_equation_0, values = (var_480_cast_fp16_0, var_470_cast_fp16_0))[name = tensor<string, []>("aw_33_cast_fp16")];
+            tensor<string, []> aw_35_equation_0 = const()[name = tensor<string, []>("aw_35_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_35_cast_fp16 = einsum(equation = aw_35_equation_0, values = (var_480_cast_fp16_1, var_470_cast_fp16_1))[name = tensor<string, []>("aw_35_cast_fp16")];
+            tensor<string, []> aw_37_equation_0 = const()[name = tensor<string, []>("aw_37_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_37_cast_fp16 = einsum(equation = aw_37_equation_0, values = (var_480_cast_fp16_2, var_470_cast_fp16_2))[name = tensor<string, []>("aw_37_cast_fp16")];
+            tensor<string, []> aw_39_equation_0 = const()[name = tensor<string, []>("aw_39_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_39_cast_fp16 = einsum(equation = aw_39_equation_0, values = (var_480_cast_fp16_3, var_470_cast_fp16_3))[name = tensor<string, []>("aw_39_cast_fp16")];
+            tensor<string, []> aw_41_equation_0 = const()[name = tensor<string, []>("aw_41_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_41_cast_fp16 = einsum(equation = aw_41_equation_0, values = (var_480_cast_fp16_4, var_470_cast_fp16_4))[name = tensor<string, []>("aw_41_cast_fp16")];
+            tensor<string, []> aw_43_equation_0 = const()[name = tensor<string, []>("aw_43_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_43_cast_fp16 = einsum(equation = aw_43_equation_0, values = (var_480_cast_fp16_5, var_470_cast_fp16_5))[name = tensor<string, []>("aw_43_cast_fp16")];
+            tensor<string, []> aw_45_equation_0 = const()[name = tensor<string, []>("aw_45_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_45_cast_fp16 = einsum(equation = aw_45_equation_0, values = (var_480_cast_fp16_6, var_470_cast_fp16_6))[name = tensor<string, []>("aw_45_cast_fp16")];
+            tensor<string, []> aw_47_equation_0 = const()[name = tensor<string, []>("aw_47_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_47_cast_fp16 = einsum(equation = aw_47_equation_0, values = (var_480_cast_fp16_7, var_470_cast_fp16_7))[name = tensor<string, []>("aw_47_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_514_cast_fp16 = softmax(axis = var_418, x = aw_33_cast_fp16)[name = tensor<string, []>("op_514_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_515_cast_fp16 = softmax(axis = var_418, x = aw_35_cast_fp16)[name = tensor<string, []>("op_515_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_516_cast_fp16 = softmax(axis = var_418, x = aw_37_cast_fp16)[name = tensor<string, []>("op_516_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_517_cast_fp16 = softmax(axis = var_418, x = aw_39_cast_fp16)[name = tensor<string, []>("op_517_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_518_cast_fp16 = softmax(axis = var_418, x = aw_41_cast_fp16)[name = tensor<string, []>("op_518_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_519_cast_fp16 = softmax(axis = var_418, x = aw_43_cast_fp16)[name = tensor<string, []>("op_519_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_520_cast_fp16 = softmax(axis = var_418, x = aw_45_cast_fp16)[name = tensor<string, []>("op_520_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_521_cast_fp16 = softmax(axis = var_418, x = aw_47_cast_fp16)[name = tensor<string, []>("op_521_cast_fp16")];
+            tensor<string, []> var_523_equation_0 = const()[name = tensor<string, []>("op_523_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_489_cast_fp16_0, var_514_cast_fp16))[name = tensor<string, []>("op_523_cast_fp16")];
+            tensor<string, []> var_525_equation_0 = const()[name = tensor<string, []>("op_525_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_525_cast_fp16 = einsum(equation = var_525_equation_0, values = (var_489_cast_fp16_1, var_515_cast_fp16))[name = tensor<string, []>("op_525_cast_fp16")];
+            tensor<string, []> var_527_equation_0 = const()[name = tensor<string, []>("op_527_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_489_cast_fp16_2, var_516_cast_fp16))[name = tensor<string, []>("op_527_cast_fp16")];
+            tensor<string, []> var_529_equation_0 = const()[name = tensor<string, []>("op_529_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_529_cast_fp16 = einsum(equation = var_529_equation_0, values = (var_489_cast_fp16_3, var_517_cast_fp16))[name = tensor<string, []>("op_529_cast_fp16")];
+            tensor<string, []> var_531_equation_0 = const()[name = tensor<string, []>("op_531_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_489_cast_fp16_4, var_518_cast_fp16))[name = tensor<string, []>("op_531_cast_fp16")];
+            tensor<string, []> var_533_equation_0 = const()[name = tensor<string, []>("op_533_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_533_cast_fp16 = einsum(equation = var_533_equation_0, values = (var_489_cast_fp16_5, var_519_cast_fp16))[name = tensor<string, []>("op_533_cast_fp16")];
+            tensor<string, []> var_535_equation_0 = const()[name = tensor<string, []>("op_535_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_489_cast_fp16_6, var_520_cast_fp16))[name = tensor<string, []>("op_535_cast_fp16")];
+            tensor<string, []> var_537_equation_0 = const()[name = tensor<string, []>("op_537_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_537_cast_fp16 = einsum(equation = var_537_equation_0, values = (var_489_cast_fp16_7, var_521_cast_fp16))[name = tensor<string, []>("op_537_cast_fp16")];
+            tensor<bool, []> input_25_interleave_0 = const()[name = tensor<string, []>("input_25_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_25_cast_fp16 = concat(axis = var_418, interleave = input_25_interleave_0, values = (var_523_cast_fp16, var_525_cast_fp16, var_527_cast_fp16, var_529_cast_fp16, var_531_cast_fp16, var_533_cast_fp16, var_535_cast_fp16, var_537_cast_fp16))[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<string, []> var_546_pad_type_0 = const()[name = tensor<string, []>("op_546_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_546_strides_0 = const()[name = tensor<string, []>("op_546_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_546_pad_0 = const()[name = tensor<string, []>("op_546_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_546_dilations_0 = const()[name = tensor<string, []>("op_546_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_546_groups_0 = const()[name = tensor<string, []>("op_546_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_2_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17543872)))];
+            tensor<fp16, [512]> blocks_2_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18068224)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_546_cast_fp16 = conv(bias = blocks_2_attn_out_bias_to_fp16, dilations = var_546_dilations_0, groups = var_546_groups_0, pad = var_546_pad_0, pad_type = var_546_pad_type_0, strides = var_546_strides_0, weight = blocks_2_attn_out_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("op_546_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = var_546_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> input_27_axes_0 = const()[name = tensor<string, []>("input_27_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18069312)))];
+            tensor<fp16, [512]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18070400)))];
+            tensor<fp16, []> var_556_to_fp16 = const()[name = tensor<string, []>("op_556_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_27_cast_fp16 = layer_norm(axes = input_27_axes_0, beta = input_27_beta_0_to_fp16, epsilon = var_556_to_fp16, gamma = input_27_gamma_0_to_fp16, x = inputs_11_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_29_strides_0 = const()[name = tensor<string, []>("input_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_29_dilations_0 = const()[name = tensor<string, []>("input_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_29_groups_0 = const()[name = tensor<string, []>("input_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [2048, 512, 1, 1]> blocks_2_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18071488)))];
+            tensor<fp16, [2048]> blocks_2_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20168704)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_29_cast_fp16 = conv(bias = blocks_2_mlp_0_bias_to_fp16, dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = blocks_2_mlp_0_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<string, []> var_582_pad_type_0 = const()[name = tensor<string, []>("op_582_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_582_strides_0 = const()[name = tensor<string, []>("op_582_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_582_pad_0 = const()[name = tensor<string, []>("op_582_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_582_dilations_0 = const()[name = tensor<string, []>("op_582_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_582_groups_0 = const()[name = tensor<string, []>("op_582_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 2048, 1, 1]> blocks_2_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20172864)))];
+            tensor<fp16, [512]> blocks_2_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22270080)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_582_cast_fp16 = conv(bias = blocks_2_mlp_2_bias_to_fp16, dilations = var_582_dilations_0, groups = var_582_groups_0, pad = var_582_pad_0, pad_type = var_582_pad_type_0, strides = var_582_strides_0, weight = blocks_2_mlp_2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("op_582_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = var_582_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_591 = const()[name = tensor<string, []>("op_591"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_33_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_33_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22271168)))];
+            tensor<fp16, [512]> input_33_beta_0_to_fp16 = const()[name = tensor<string, []>("input_33_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22272256)))];
+            tensor<fp16, []> var_607_to_fp16 = const()[name = tensor<string, []>("op_607_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, beta = input_33_beta_0_to_fp16, epsilon = var_607_to_fp16, gamma = input_33_gamma_0_to_fp16, x = inputs_13_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<string, []> q_7_pad_type_0 = const()[name = tensor<string, []>("q_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_7_strides_0 = const()[name = tensor<string, []>("q_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_7_pad_0 = const()[name = tensor<string, []>("q_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_7_dilations_0 = const()[name = tensor<string, []>("q_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_7_groups_0 = const()[name = tensor<string, []>("q_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> var_642_weight_0_to_fp16 = const()[name = tensor<string, []>("op_642_weight_0_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22273344)))];
+            tensor<fp16, [512]> var_642_bias_0_to_fp16 = const()[name = tensor<string, []>("op_642_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22797696)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_642_cast_fp16 = conv(bias = var_642_bias_0_to_fp16, dilations = q_7_dilations_0, groups = q_7_groups_0, pad = q_7_pad_0, pad_type = q_7_pad_type_0, strides = q_7_strides_0, weight = var_642_weight_0_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_642_cast_fp16")];
+            tensor<string, []> k_7_pad_type_0 = const()[name = tensor<string, []>("k_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_7_strides_0 = const()[name = tensor<string, []>("k_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_7_pad_0 = const()[name = tensor<string, []>("k_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_7_dilations_0 = const()[name = tensor<string, []>("k_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_7_groups_0 = const()[name = tensor<string, []>("k_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_3_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_key_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22798784)))];
+            tensor<fp16, [1, 512, 1, 1500]> k_7_cast_fp16 = conv(dilations = k_7_dilations_0, groups = k_7_groups_0, pad = k_7_pad_0, pad_type = k_7_pad_type_0, strides = k_7_strides_0, weight = blocks_3_attn_key_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("k_7_cast_fp16")];
+            tensor<string, []> var_640_pad_type_0 = const()[name = tensor<string, []>("op_640_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_640_strides_0 = const()[name = tensor<string, []>("op_640_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_640_pad_0 = const()[name = tensor<string, []>("op_640_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_640_dilations_0 = const()[name = tensor<string, []>("op_640_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_640_groups_0 = const()[name = tensor<string, []>("op_640_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_3_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23323136)))];
+            tensor<fp16, [512]> blocks_3_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23847488)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_640_cast_fp16 = conv(bias = blocks_3_attn_value_bias_to_fp16, dilations = var_640_dilations_0, groups = var_640_groups_0, pad = var_640_pad_0, pad_type = var_640_pad_type_0, strides = var_640_strides_0, weight = blocks_3_attn_value_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_640_cast_fp16")];
+            tensor<int32, [8]> tile_9 = const()[name = tensor<string, []>("tile_9"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_643_axis_0 = const()[name = tensor<string, []>("op_643_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16_7 = split(axis = var_643_axis_0, split_sizes = tile_9, x = var_642_cast_fp16)[name = tensor<string, []>("op_643_cast_fp16")];
+            tensor<int32, [4]> var_652_perm_0 = const()[name = tensor<string, []>("op_652_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [8]> tile_10 = const()[name = tensor<string, []>("tile_10"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_653_axis_0 = const()[name = tensor<string, []>("op_653_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 512]> var_652_cast_fp16 = transpose(perm = var_652_perm_0, x = k_7_cast_fp16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1500, 1, 64]> var_653_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_653_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_653_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_653_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_653_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_653_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_653_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_653_cast_fp16_7 = split(axis = var_653_axis_0, split_sizes = tile_10, x = var_652_cast_fp16)[name = tensor<string, []>("op_653_cast_fp16")];
+            tensor<int32, [8]> tile_11 = const()[name = tensor<string, []>("tile_11"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_662_axis_0 = const()[name = tensor<string, []>("op_662_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_662_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_662_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_662_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_662_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_662_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_662_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_662_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_662_cast_fp16_7 = split(axis = var_662_axis_0, split_sizes = tile_11, x = var_640_cast_fp16)[name = tensor<string, []>("op_662_cast_fp16")];
+            tensor<string, []> aw_49_equation_0 = const()[name = tensor<string, []>("aw_49_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_49_cast_fp16 = einsum(equation = aw_49_equation_0, values = (var_653_cast_fp16_0, var_643_cast_fp16_0))[name = tensor<string, []>("aw_49_cast_fp16")];
+            tensor<string, []> aw_51_equation_0 = const()[name = tensor<string, []>("aw_51_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_51_cast_fp16 = einsum(equation = aw_51_equation_0, values = (var_653_cast_fp16_1, var_643_cast_fp16_1))[name = tensor<string, []>("aw_51_cast_fp16")];
+            tensor<string, []> aw_53_equation_0 = const()[name = tensor<string, []>("aw_53_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_53_cast_fp16 = einsum(equation = aw_53_equation_0, values = (var_653_cast_fp16_2, var_643_cast_fp16_2))[name = tensor<string, []>("aw_53_cast_fp16")];
+            tensor<string, []> aw_55_equation_0 = const()[name = tensor<string, []>("aw_55_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_55_cast_fp16 = einsum(equation = aw_55_equation_0, values = (var_653_cast_fp16_3, var_643_cast_fp16_3))[name = tensor<string, []>("aw_55_cast_fp16")];
+            tensor<string, []> aw_57_equation_0 = const()[name = tensor<string, []>("aw_57_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_57_cast_fp16 = einsum(equation = aw_57_equation_0, values = (var_653_cast_fp16_4, var_643_cast_fp16_4))[name = tensor<string, []>("aw_57_cast_fp16")];
+            tensor<string, []> aw_59_equation_0 = const()[name = tensor<string, []>("aw_59_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_59_cast_fp16 = einsum(equation = aw_59_equation_0, values = (var_653_cast_fp16_5, var_643_cast_fp16_5))[name = tensor<string, []>("aw_59_cast_fp16")];
+            tensor<string, []> aw_61_equation_0 = const()[name = tensor<string, []>("aw_61_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_61_cast_fp16 = einsum(equation = aw_61_equation_0, values = (var_653_cast_fp16_6, var_643_cast_fp16_6))[name = tensor<string, []>("aw_61_cast_fp16")];
+            tensor<string, []> aw_63_equation_0 = const()[name = tensor<string, []>("aw_63_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_63_cast_fp16 = einsum(equation = aw_63_equation_0, values = (var_653_cast_fp16_7, var_643_cast_fp16_7))[name = tensor<string, []>("aw_63_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_687_cast_fp16 = softmax(axis = var_591, x = aw_49_cast_fp16)[name = tensor<string, []>("op_687_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_688_cast_fp16 = softmax(axis = var_591, x = aw_51_cast_fp16)[name = tensor<string, []>("op_688_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_689_cast_fp16 = softmax(axis = var_591, x = aw_53_cast_fp16)[name = tensor<string, []>("op_689_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_690_cast_fp16 = softmax(axis = var_591, x = aw_55_cast_fp16)[name = tensor<string, []>("op_690_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_691_cast_fp16 = softmax(axis = var_591, x = aw_57_cast_fp16)[name = tensor<string, []>("op_691_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_692_cast_fp16 = softmax(axis = var_591, x = aw_59_cast_fp16)[name = tensor<string, []>("op_692_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_693_cast_fp16 = softmax(axis = var_591, x = aw_61_cast_fp16)[name = tensor<string, []>("op_693_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_694_cast_fp16 = softmax(axis = var_591, x = aw_63_cast_fp16)[name = tensor<string, []>("op_694_cast_fp16")];
+            tensor<string, []> var_696_equation_0 = const()[name = tensor<string, []>("op_696_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_696_cast_fp16 = einsum(equation = var_696_equation_0, values = (var_662_cast_fp16_0, var_687_cast_fp16))[name = tensor<string, []>("op_696_cast_fp16")];
+            tensor<string, []> var_698_equation_0 = const()[name = tensor<string, []>("op_698_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_698_cast_fp16 = einsum(equation = var_698_equation_0, values = (var_662_cast_fp16_1, var_688_cast_fp16))[name = tensor<string, []>("op_698_cast_fp16")];
+            tensor<string, []> var_700_equation_0 = const()[name = tensor<string, []>("op_700_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_700_cast_fp16 = einsum(equation = var_700_equation_0, values = (var_662_cast_fp16_2, var_689_cast_fp16))[name = tensor<string, []>("op_700_cast_fp16")];
+            tensor<string, []> var_702_equation_0 = const()[name = tensor<string, []>("op_702_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_702_cast_fp16 = einsum(equation = var_702_equation_0, values = (var_662_cast_fp16_3, var_690_cast_fp16))[name = tensor<string, []>("op_702_cast_fp16")];
+            tensor<string, []> var_704_equation_0 = const()[name = tensor<string, []>("op_704_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_704_cast_fp16 = einsum(equation = var_704_equation_0, values = (var_662_cast_fp16_4, var_691_cast_fp16))[name = tensor<string, []>("op_704_cast_fp16")];
+            tensor<string, []> var_706_equation_0 = const()[name = tensor<string, []>("op_706_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_706_cast_fp16 = einsum(equation = var_706_equation_0, values = (var_662_cast_fp16_5, var_692_cast_fp16))[name = tensor<string, []>("op_706_cast_fp16")];
+            tensor<string, []> var_708_equation_0 = const()[name = tensor<string, []>("op_708_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_708_cast_fp16 = einsum(equation = var_708_equation_0, values = (var_662_cast_fp16_6, var_693_cast_fp16))[name = tensor<string, []>("op_708_cast_fp16")];
+            tensor<string, []> var_710_equation_0 = const()[name = tensor<string, []>("op_710_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_710_cast_fp16 = einsum(equation = var_710_equation_0, values = (var_662_cast_fp16_7, var_694_cast_fp16))[name = tensor<string, []>("op_710_cast_fp16")];
+            tensor<bool, []> input_35_interleave_0 = const()[name = tensor<string, []>("input_35_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_35_cast_fp16 = concat(axis = var_591, interleave = input_35_interleave_0, values = (var_696_cast_fp16, var_698_cast_fp16, var_700_cast_fp16, var_702_cast_fp16, var_704_cast_fp16, var_706_cast_fp16, var_708_cast_fp16, var_710_cast_fp16))[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<string, []> var_719_pad_type_0 = const()[name = tensor<string, []>("op_719_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_719_strides_0 = const()[name = tensor<string, []>("op_719_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_719_pad_0 = const()[name = tensor<string, []>("op_719_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_719_dilations_0 = const()[name = tensor<string, []>("op_719_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_719_groups_0 = const()[name = tensor<string, []>("op_719_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_3_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23848576)))];
+            tensor<fp16, [512]> blocks_3_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24372928)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_719_cast_fp16 = conv(bias = blocks_3_attn_out_bias_to_fp16, dilations = var_719_dilations_0, groups = var_719_groups_0, pad = var_719_pad_0, pad_type = var_719_pad_type_0, strides = var_719_strides_0, weight = blocks_3_attn_out_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("op_719_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = var_719_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> input_37_axes_0 = const()[name = tensor<string, []>("input_37_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_37_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24374016)))];
+            tensor<fp16, [512]> input_37_beta_0_to_fp16 = const()[name = tensor<string, []>("input_37_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24375104)))];
+            tensor<fp16, []> var_729_to_fp16 = const()[name = tensor<string, []>("op_729_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = input_37_beta_0_to_fp16, epsilon = var_729_to_fp16, gamma = input_37_gamma_0_to_fp16, x = inputs_15_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_pad_type_0 = const()[name = tensor<string, []>("input_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_39_strides_0 = const()[name = tensor<string, []>("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_39_pad_0 = const()[name = tensor<string, []>("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_39_dilations_0 = const()[name = tensor<string, []>("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_39_groups_0 = const()[name = tensor<string, []>("input_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [2048, 512, 1, 1]> blocks_3_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24376192)))];
+            tensor<fp16, [2048]> blocks_3_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26473408)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_39_cast_fp16 = conv(bias = blocks_3_mlp_0_bias_to_fp16, dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = blocks_3_mlp_0_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<string, []> input_41_mode_0 = const()[name = tensor<string, []>("input_41_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = input_39_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<string, []> var_755_pad_type_0 = const()[name = tensor<string, []>("op_755_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_755_strides_0 = const()[name = tensor<string, []>("op_755_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_755_pad_0 = const()[name = tensor<string, []>("op_755_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_755_dilations_0 = const()[name = tensor<string, []>("op_755_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_755_groups_0 = const()[name = tensor<string, []>("op_755_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 2048, 1, 1]> blocks_3_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26477568)))];
+            tensor<fp16, [512]> blocks_3_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28574784)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_755_cast_fp16 = conv(bias = blocks_3_mlp_2_bias_to_fp16, dilations = var_755_dilations_0, groups = var_755_groups_0, pad = var_755_pad_0, pad_type = var_755_pad_type_0, strides = var_755_strides_0, weight = blocks_3_mlp_2_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("op_755_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = var_755_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, []> var_764 = const()[name = tensor<string, []>("op_764"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_43_axes_0 = const()[name = tensor<string, []>("input_43_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_43_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28575872)))];
+            tensor<fp16, [512]> input_43_beta_0_to_fp16 = const()[name = tensor<string, []>("input_43_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28576960)))];
+            tensor<fp16, []> var_780_to_fp16 = const()[name = tensor<string, []>("op_780_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_43_cast_fp16 = layer_norm(axes = input_43_axes_0, beta = input_43_beta_0_to_fp16, epsilon = var_780_to_fp16, gamma = input_43_gamma_0_to_fp16, x = inputs_17_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<string, []> q_9_pad_type_0 = const()[name = tensor<string, []>("q_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_9_strides_0 = const()[name = tensor<string, []>("q_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_9_pad_0 = const()[name = tensor<string, []>("q_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_9_dilations_0 = const()[name = tensor<string, []>("q_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_9_groups_0 = const()[name = tensor<string, []>("q_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> var_815_weight_0_to_fp16 = const()[name = tensor<string, []>("op_815_weight_0_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28578048)))];
+            tensor<fp16, [512]> var_815_bias_0_to_fp16 = const()[name = tensor<string, []>("op_815_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29102400)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_815_cast_fp16 = conv(bias = var_815_bias_0_to_fp16, dilations = q_9_dilations_0, groups = q_9_groups_0, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = q_9_strides_0, weight = var_815_weight_0_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_815_cast_fp16")];
+            tensor<string, []> k_9_pad_type_0 = const()[name = tensor<string, []>("k_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_9_strides_0 = const()[name = tensor<string, []>("k_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_9_pad_0 = const()[name = tensor<string, []>("k_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_9_dilations_0 = const()[name = tensor<string, []>("k_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_9_groups_0 = const()[name = tensor<string, []>("k_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_4_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_key_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29103488)))];
+            tensor<fp16, [1, 512, 1, 1500]> k_9_cast_fp16 = conv(dilations = k_9_dilations_0, groups = k_9_groups_0, pad = k_9_pad_0, pad_type = k_9_pad_type_0, strides = k_9_strides_0, weight = blocks_4_attn_key_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("k_9_cast_fp16")];
+            tensor<string, []> var_813_pad_type_0 = const()[name = tensor<string, []>("op_813_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_813_strides_0 = const()[name = tensor<string, []>("op_813_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_813_pad_0 = const()[name = tensor<string, []>("op_813_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_813_dilations_0 = const()[name = tensor<string, []>("op_813_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_813_groups_0 = const()[name = tensor<string, []>("op_813_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_4_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29627840)))];
+            tensor<fp16, [512]> blocks_4_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30152192)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_813_cast_fp16 = conv(bias = blocks_4_attn_value_bias_to_fp16, dilations = var_813_dilations_0, groups = var_813_groups_0, pad = var_813_pad_0, pad_type = var_813_pad_type_0, strides = var_813_strides_0, weight = blocks_4_attn_value_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_813_cast_fp16")];
+            tensor<int32, [8]> tile_12 = const()[name = tensor<string, []>("tile_12"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_816_axis_0 = const()[name = tensor<string, []>("op_816_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_816_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_816_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_816_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_816_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_816_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_816_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_816_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_816_cast_fp16_7 = split(axis = var_816_axis_0, split_sizes = tile_12, x = var_815_cast_fp16)[name = tensor<string, []>("op_816_cast_fp16")];
+            tensor<int32, [4]> var_825_perm_0 = const()[name = tensor<string, []>("op_825_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [8]> tile_13 = const()[name = tensor<string, []>("tile_13"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_826_axis_0 = const()[name = tensor<string, []>("op_826_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 512]> var_825_cast_fp16 = transpose(perm = var_825_perm_0, x = k_9_cast_fp16)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 1500, 1, 64]> var_826_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_826_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_826_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_826_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_826_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_826_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_826_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_826_cast_fp16_7 = split(axis = var_826_axis_0, split_sizes = tile_13, x = var_825_cast_fp16)[name = tensor<string, []>("op_826_cast_fp16")];
+            tensor<int32, [8]> tile_14 = const()[name = tensor<string, []>("tile_14"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_835_axis_0 = const()[name = tensor<string, []>("op_835_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_835_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_835_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_835_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_835_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_835_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_835_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_835_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_835_cast_fp16_7 = split(axis = var_835_axis_0, split_sizes = tile_14, x = var_813_cast_fp16)[name = tensor<string, []>("op_835_cast_fp16")];
+            tensor<string, []> aw_65_equation_0 = const()[name = tensor<string, []>("aw_65_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_65_cast_fp16 = einsum(equation = aw_65_equation_0, values = (var_826_cast_fp16_0, var_816_cast_fp16_0))[name = tensor<string, []>("aw_65_cast_fp16")];
+            tensor<string, []> aw_67_equation_0 = const()[name = tensor<string, []>("aw_67_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_67_cast_fp16 = einsum(equation = aw_67_equation_0, values = (var_826_cast_fp16_1, var_816_cast_fp16_1))[name = tensor<string, []>("aw_67_cast_fp16")];
+            tensor<string, []> aw_69_equation_0 = const()[name = tensor<string, []>("aw_69_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_69_cast_fp16 = einsum(equation = aw_69_equation_0, values = (var_826_cast_fp16_2, var_816_cast_fp16_2))[name = tensor<string, []>("aw_69_cast_fp16")];
+            tensor<string, []> aw_71_equation_0 = const()[name = tensor<string, []>("aw_71_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_71_cast_fp16 = einsum(equation = aw_71_equation_0, values = (var_826_cast_fp16_3, var_816_cast_fp16_3))[name = tensor<string, []>("aw_71_cast_fp16")];
+            tensor<string, []> aw_73_equation_0 = const()[name = tensor<string, []>("aw_73_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_73_cast_fp16 = einsum(equation = aw_73_equation_0, values = (var_826_cast_fp16_4, var_816_cast_fp16_4))[name = tensor<string, []>("aw_73_cast_fp16")];
+            tensor<string, []> aw_75_equation_0 = const()[name = tensor<string, []>("aw_75_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_75_cast_fp16 = einsum(equation = aw_75_equation_0, values = (var_826_cast_fp16_5, var_816_cast_fp16_5))[name = tensor<string, []>("aw_75_cast_fp16")];
+            tensor<string, []> aw_77_equation_0 = const()[name = tensor<string, []>("aw_77_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_77_cast_fp16 = einsum(equation = aw_77_equation_0, values = (var_826_cast_fp16_6, var_816_cast_fp16_6))[name = tensor<string, []>("aw_77_cast_fp16")];
+            tensor<string, []> aw_79_equation_0 = const()[name = tensor<string, []>("aw_79_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_79_cast_fp16 = einsum(equation = aw_79_equation_0, values = (var_826_cast_fp16_7, var_816_cast_fp16_7))[name = tensor<string, []>("aw_79_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_860_cast_fp16 = softmax(axis = var_764, x = aw_65_cast_fp16)[name = tensor<string, []>("op_860_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_861_cast_fp16 = softmax(axis = var_764, x = aw_67_cast_fp16)[name = tensor<string, []>("op_861_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_862_cast_fp16 = softmax(axis = var_764, x = aw_69_cast_fp16)[name = tensor<string, []>("op_862_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_863_cast_fp16 = softmax(axis = var_764, x = aw_71_cast_fp16)[name = tensor<string, []>("op_863_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_864_cast_fp16 = softmax(axis = var_764, x = aw_73_cast_fp16)[name = tensor<string, []>("op_864_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_865_cast_fp16 = softmax(axis = var_764, x = aw_75_cast_fp16)[name = tensor<string, []>("op_865_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_866_cast_fp16 = softmax(axis = var_764, x = aw_77_cast_fp16)[name = tensor<string, []>("op_866_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_867_cast_fp16 = softmax(axis = var_764, x = aw_79_cast_fp16)[name = tensor<string, []>("op_867_cast_fp16")];
+            tensor<string, []> var_869_equation_0 = const()[name = tensor<string, []>("op_869_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_869_cast_fp16 = einsum(equation = var_869_equation_0, values = (var_835_cast_fp16_0, var_860_cast_fp16))[name = tensor<string, []>("op_869_cast_fp16")];
+            tensor<string, []> var_871_equation_0 = const()[name = tensor<string, []>("op_871_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16 = einsum(equation = var_871_equation_0, values = (var_835_cast_fp16_1, var_861_cast_fp16))[name = tensor<string, []>("op_871_cast_fp16")];
+            tensor<string, []> var_873_equation_0 = const()[name = tensor<string, []>("op_873_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_873_cast_fp16 = einsum(equation = var_873_equation_0, values = (var_835_cast_fp16_2, var_862_cast_fp16))[name = tensor<string, []>("op_873_cast_fp16")];
+            tensor<string, []> var_875_equation_0 = const()[name = tensor<string, []>("op_875_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_875_cast_fp16 = einsum(equation = var_875_equation_0, values = (var_835_cast_fp16_3, var_863_cast_fp16))[name = tensor<string, []>("op_875_cast_fp16")];
+            tensor<string, []> var_877_equation_0 = const()[name = tensor<string, []>("op_877_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_877_cast_fp16 = einsum(equation = var_877_equation_0, values = (var_835_cast_fp16_4, var_864_cast_fp16))[name = tensor<string, []>("op_877_cast_fp16")];
+            tensor<string, []> var_879_equation_0 = const()[name = tensor<string, []>("op_879_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_879_cast_fp16 = einsum(equation = var_879_equation_0, values = (var_835_cast_fp16_5, var_865_cast_fp16))[name = tensor<string, []>("op_879_cast_fp16")];
+            tensor<string, []> var_881_equation_0 = const()[name = tensor<string, []>("op_881_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_881_cast_fp16 = einsum(equation = var_881_equation_0, values = (var_835_cast_fp16_6, var_866_cast_fp16))[name = tensor<string, []>("op_881_cast_fp16")];
+            tensor<string, []> var_883_equation_0 = const()[name = tensor<string, []>("op_883_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_883_cast_fp16 = einsum(equation = var_883_equation_0, values = (var_835_cast_fp16_7, var_867_cast_fp16))[name = tensor<string, []>("op_883_cast_fp16")];
+            tensor<bool, []> input_45_interleave_0 = const()[name = tensor<string, []>("input_45_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_45_cast_fp16 = concat(axis = var_764, interleave = input_45_interleave_0, values = (var_869_cast_fp16, var_871_cast_fp16, var_873_cast_fp16, var_875_cast_fp16, var_877_cast_fp16, var_879_cast_fp16, var_881_cast_fp16, var_883_cast_fp16))[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<string, []> var_892_pad_type_0 = const()[name = tensor<string, []>("op_892_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_892_strides_0 = const()[name = tensor<string, []>("op_892_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_892_pad_0 = const()[name = tensor<string, []>("op_892_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_892_dilations_0 = const()[name = tensor<string, []>("op_892_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_892_groups_0 = const()[name = tensor<string, []>("op_892_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_4_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30153280)))];
+            tensor<fp16, [512]> blocks_4_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30677632)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_892_cast_fp16 = conv(bias = blocks_4_attn_out_bias_to_fp16, dilations = var_892_dilations_0, groups = var_892_groups_0, pad = var_892_pad_0, pad_type = var_892_pad_type_0, strides = var_892_strides_0, weight = blocks_4_attn_out_weight_to_fp16, x = input_45_cast_fp16)[name = tensor<string, []>("op_892_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = var_892_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, [1]> input_47_axes_0 = const()[name = tensor<string, []>("input_47_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_47_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_47_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30678720)))];
+            tensor<fp16, [512]> input_47_beta_0_to_fp16 = const()[name = tensor<string, []>("input_47_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30679808)))];
+            tensor<fp16, []> var_902_to_fp16 = const()[name = tensor<string, []>("op_902_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, beta = input_47_beta_0_to_fp16, epsilon = var_902_to_fp16, gamma = input_47_gamma_0_to_fp16, x = inputs_19_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<string, []> input_49_pad_type_0 = const()[name = tensor<string, []>("input_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_49_strides_0 = const()[name = tensor<string, []>("input_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_49_pad_0 = const()[name = tensor<string, []>("input_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_49_dilations_0 = const()[name = tensor<string, []>("input_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_49_groups_0 = const()[name = tensor<string, []>("input_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [2048, 512, 1, 1]> blocks_4_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30680896)))];
+            tensor<fp16, [2048]> blocks_4_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32778112)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_49_cast_fp16 = conv(bias = blocks_4_mlp_0_bias_to_fp16, dilations = input_49_dilations_0, groups = input_49_groups_0, pad = input_49_pad_0, pad_type = input_49_pad_type_0, strides = input_49_strides_0, weight = blocks_4_mlp_0_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<string, []> input_51_mode_0 = const()[name = tensor<string, []>("input_51_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_51_cast_fp16 = gelu(mode = input_51_mode_0, x = input_49_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<string, []> var_928_pad_type_0 = const()[name = tensor<string, []>("op_928_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_928_strides_0 = const()[name = tensor<string, []>("op_928_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_928_pad_0 = const()[name = tensor<string, []>("op_928_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_928_dilations_0 = const()[name = tensor<string, []>("op_928_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_928_groups_0 = const()[name = tensor<string, []>("op_928_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 2048, 1, 1]> blocks_4_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32782272)))];
+            tensor<fp16, [512]> blocks_4_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34879488)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_928_cast_fp16 = conv(bias = blocks_4_mlp_2_bias_to_fp16, dilations = var_928_dilations_0, groups = var_928_groups_0, pad = var_928_pad_0, pad_type = var_928_pad_type_0, strides = var_928_strides_0, weight = blocks_4_mlp_2_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("op_928_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = var_928_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, []> var_937 = const()[name = tensor<string, []>("op_937"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_53_axes_0 = const()[name = tensor<string, []>("input_53_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_53_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_53_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34880576)))];
+            tensor<fp16, [512]> input_53_beta_0_to_fp16 = const()[name = tensor<string, []>("input_53_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34881664)))];
+            tensor<fp16, []> var_953_to_fp16 = const()[name = tensor<string, []>("op_953_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_53_cast_fp16 = layer_norm(axes = input_53_axes_0, beta = input_53_beta_0_to_fp16, epsilon = var_953_to_fp16, gamma = input_53_gamma_0_to_fp16, x = inputs_21_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<string, []> q_pad_type_0 = const()[name = tensor<string, []>("q_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_strides_0 = const()[name = tensor<string, []>("q_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_pad_0 = const()[name = tensor<string, []>("q_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_dilations_0 = const()[name = tensor<string, []>("q_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_groups_0 = const()[name = tensor<string, []>("q_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> var_988_weight_0_to_fp16 = const()[name = tensor<string, []>("op_988_weight_0_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34882752)))];
+            tensor<fp16, [512]> var_988_bias_0_to_fp16 = const()[name = tensor<string, []>("op_988_bias_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35407104)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_988_cast_fp16 = conv(bias = var_988_bias_0_to_fp16, dilations = q_dilations_0, groups = q_groups_0, pad = q_pad_0, pad_type = q_pad_type_0, strides = q_strides_0, weight = var_988_weight_0_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_988_cast_fp16")];
+            tensor<string, []> k_pad_type_0 = const()[name = tensor<string, []>("k_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_strides_0 = const()[name = tensor<string, []>("k_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_pad_0 = const()[name = tensor<string, []>("k_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_dilations_0 = const()[name = tensor<string, []>("k_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_groups_0 = const()[name = tensor<string, []>("k_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_5_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_key_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35408192)))];
+            tensor<fp16, [1, 512, 1, 1500]> k_cast_fp16 = conv(dilations = k_dilations_0, groups = k_groups_0, pad = k_pad_0, pad_type = k_pad_type_0, strides = k_strides_0, weight = blocks_5_attn_key_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("k_cast_fp16")];
+            tensor<string, []> var_986_pad_type_0 = const()[name = tensor<string, []>("op_986_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_986_strides_0 = const()[name = tensor<string, []>("op_986_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_986_pad_0 = const()[name = tensor<string, []>("op_986_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_986_dilations_0 = const()[name = tensor<string, []>("op_986_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_986_groups_0 = const()[name = tensor<string, []>("op_986_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_5_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35932544)))];
+            tensor<fp16, [512]> blocks_5_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36456896)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_986_cast_fp16 = conv(bias = blocks_5_attn_value_bias_to_fp16, dilations = var_986_dilations_0, groups = var_986_groups_0, pad = var_986_pad_0, pad_type = var_986_pad_type_0, strides = var_986_strides_0, weight = blocks_5_attn_value_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_986_cast_fp16")];
+            tensor<int32, [8]> tile_15 = const()[name = tensor<string, []>("tile_15"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_989_axis_0 = const()[name = tensor<string, []>("op_989_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_989_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_989_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_989_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_989_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_989_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_989_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_989_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_989_cast_fp16_7 = split(axis = var_989_axis_0, split_sizes = tile_15, x = var_988_cast_fp16)[name = tensor<string, []>("op_989_cast_fp16")];
+            tensor<int32, [4]> var_998_perm_0 = const()[name = tensor<string, []>("op_998_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [8]> tile_16 = const()[name = tensor<string, []>("tile_16"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_999_axis_0 = const()[name = tensor<string, []>("op_999_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 512]> var_998_cast_fp16 = transpose(perm = var_998_perm_0, x = k_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1500, 1, 64]> var_999_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_999_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_999_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_999_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_999_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_999_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_999_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_999_cast_fp16_7 = split(axis = var_999_axis_0, split_sizes = tile_16, x = var_998_cast_fp16)[name = tensor<string, []>("op_999_cast_fp16")];
+            tensor<int32, [8]> tile_17 = const()[name = tensor<string, []>("tile_17"), val = tensor<int32, [8]>([64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1008_axis_0 = const()[name = tensor<string, []>("op_1008_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1008_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1008_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1008_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1008_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1008_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1008_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1008_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1008_cast_fp16_7 = split(axis = var_1008_axis_0, split_sizes = tile_17, x = var_986_cast_fp16)[name = tensor<string, []>("op_1008_cast_fp16")];
+            tensor<string, []> aw_81_equation_0 = const()[name = tensor<string, []>("aw_81_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_81_cast_fp16 = einsum(equation = aw_81_equation_0, values = (var_999_cast_fp16_0, var_989_cast_fp16_0))[name = tensor<string, []>("aw_81_cast_fp16")];
+            tensor<string, []> aw_83_equation_0 = const()[name = tensor<string, []>("aw_83_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_83_cast_fp16 = einsum(equation = aw_83_equation_0, values = (var_999_cast_fp16_1, var_989_cast_fp16_1))[name = tensor<string, []>("aw_83_cast_fp16")];
+            tensor<string, []> aw_85_equation_0 = const()[name = tensor<string, []>("aw_85_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_85_cast_fp16 = einsum(equation = aw_85_equation_0, values = (var_999_cast_fp16_2, var_989_cast_fp16_2))[name = tensor<string, []>("aw_85_cast_fp16")];
+            tensor<string, []> aw_87_equation_0 = const()[name = tensor<string, []>("aw_87_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_87_cast_fp16 = einsum(equation = aw_87_equation_0, values = (var_999_cast_fp16_3, var_989_cast_fp16_3))[name = tensor<string, []>("aw_87_cast_fp16")];
+            tensor<string, []> aw_89_equation_0 = const()[name = tensor<string, []>("aw_89_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_89_cast_fp16 = einsum(equation = aw_89_equation_0, values = (var_999_cast_fp16_4, var_989_cast_fp16_4))[name = tensor<string, []>("aw_89_cast_fp16")];
+            tensor<string, []> aw_91_equation_0 = const()[name = tensor<string, []>("aw_91_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_91_cast_fp16 = einsum(equation = aw_91_equation_0, values = (var_999_cast_fp16_5, var_989_cast_fp16_5))[name = tensor<string, []>("aw_91_cast_fp16")];
+            tensor<string, []> aw_93_equation_0 = const()[name = tensor<string, []>("aw_93_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_93_cast_fp16 = einsum(equation = aw_93_equation_0, values = (var_999_cast_fp16_6, var_989_cast_fp16_6))[name = tensor<string, []>("aw_93_cast_fp16")];
+            tensor<string, []> aw_equation_0 = const()[name = tensor<string, []>("aw_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_cast_fp16 = einsum(equation = aw_equation_0, values = (var_999_cast_fp16_7, var_989_cast_fp16_7))[name = tensor<string, []>("aw_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1033_cast_fp16 = softmax(axis = var_937, x = aw_81_cast_fp16)[name = tensor<string, []>("op_1033_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1034_cast_fp16 = softmax(axis = var_937, x = aw_83_cast_fp16)[name = tensor<string, []>("op_1034_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1035_cast_fp16 = softmax(axis = var_937, x = aw_85_cast_fp16)[name = tensor<string, []>("op_1035_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1036_cast_fp16 = softmax(axis = var_937, x = aw_87_cast_fp16)[name = tensor<string, []>("op_1036_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1037_cast_fp16 = softmax(axis = var_937, x = aw_89_cast_fp16)[name = tensor<string, []>("op_1037_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1038_cast_fp16 = softmax(axis = var_937, x = aw_91_cast_fp16)[name = tensor<string, []>("op_1038_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1039_cast_fp16 = softmax(axis = var_937, x = aw_93_cast_fp16)[name = tensor<string, []>("op_1039_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1040_cast_fp16 = softmax(axis = var_937, x = aw_cast_fp16)[name = tensor<string, []>("op_1040_cast_fp16")];
+            tensor<string, []> var_1042_equation_0 = const()[name = tensor<string, []>("op_1042_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1042_cast_fp16 = einsum(equation = var_1042_equation_0, values = (var_1008_cast_fp16_0, var_1033_cast_fp16))[name = tensor<string, []>("op_1042_cast_fp16")];
+            tensor<string, []> var_1044_equation_0 = const()[name = tensor<string, []>("op_1044_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1044_cast_fp16 = einsum(equation = var_1044_equation_0, values = (var_1008_cast_fp16_1, var_1034_cast_fp16))[name = tensor<string, []>("op_1044_cast_fp16")];
+            tensor<string, []> var_1046_equation_0 = const()[name = tensor<string, []>("op_1046_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1046_cast_fp16 = einsum(equation = var_1046_equation_0, values = (var_1008_cast_fp16_2, var_1035_cast_fp16))[name = tensor<string, []>("op_1046_cast_fp16")];
+            tensor<string, []> var_1048_equation_0 = const()[name = tensor<string, []>("op_1048_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1048_cast_fp16 = einsum(equation = var_1048_equation_0, values = (var_1008_cast_fp16_3, var_1036_cast_fp16))[name = tensor<string, []>("op_1048_cast_fp16")];
+            tensor<string, []> var_1050_equation_0 = const()[name = tensor<string, []>("op_1050_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1050_cast_fp16 = einsum(equation = var_1050_equation_0, values = (var_1008_cast_fp16_4, var_1037_cast_fp16))[name = tensor<string, []>("op_1050_cast_fp16")];
+            tensor<string, []> var_1052_equation_0 = const()[name = tensor<string, []>("op_1052_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1052_cast_fp16 = einsum(equation = var_1052_equation_0, values = (var_1008_cast_fp16_5, var_1038_cast_fp16))[name = tensor<string, []>("op_1052_cast_fp16")];
+            tensor<string, []> var_1054_equation_0 = const()[name = tensor<string, []>("op_1054_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1054_cast_fp16 = einsum(equation = var_1054_equation_0, values = (var_1008_cast_fp16_6, var_1039_cast_fp16))[name = tensor<string, []>("op_1054_cast_fp16")];
+            tensor<string, []> var_1056_equation_0 = const()[name = tensor<string, []>("op_1056_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1056_cast_fp16 = einsum(equation = var_1056_equation_0, values = (var_1008_cast_fp16_7, var_1040_cast_fp16))[name = tensor<string, []>("op_1056_cast_fp16")];
+            tensor<bool, []> input_55_interleave_0 = const()[name = tensor<string, []>("input_55_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 512, 1, 1500]> input_55_cast_fp16 = concat(axis = var_937, interleave = input_55_interleave_0, values = (var_1042_cast_fp16, var_1044_cast_fp16, var_1046_cast_fp16, var_1048_cast_fp16, var_1050_cast_fp16, var_1052_cast_fp16, var_1054_cast_fp16, var_1056_cast_fp16))[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<string, []> var_1065_pad_type_0 = const()[name = tensor<string, []>("op_1065_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1065_strides_0 = const()[name = tensor<string, []>("op_1065_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1065_pad_0 = const()[name = tensor<string, []>("op_1065_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1065_dilations_0 = const()[name = tensor<string, []>("op_1065_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1065_groups_0 = const()[name = tensor<string, []>("op_1065_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 512, 1, 1]> blocks_5_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_weight_to_fp16"), val = tensor<fp16, [512, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36457984)))];
+            tensor<fp16, [512]> blocks_5_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36982336)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_1065_cast_fp16 = conv(bias = blocks_5_attn_out_bias_to_fp16, dilations = var_1065_dilations_0, groups = var_1065_groups_0, pad = var_1065_pad_0, pad_type = var_1065_pad_type_0, strides = var_1065_strides_0, weight = blocks_5_attn_out_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("op_1065_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = var_1065_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> input_57_axes_0 = const()[name = tensor<string, []>("input_57_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> input_57_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_57_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36983424)))];
+            tensor<fp16, [512]> input_57_beta_0_to_fp16 = const()[name = tensor<string, []>("input_57_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36984512)))];
+            tensor<fp16, []> var_1075_to_fp16 = const()[name = tensor<string, []>("op_1075_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> input_57_cast_fp16 = layer_norm(axes = input_57_axes_0, beta = input_57_beta_0_to_fp16, epsilon = var_1075_to_fp16, gamma = input_57_gamma_0_to_fp16, x = inputs_23_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<string, []> input_59_pad_type_0 = const()[name = tensor<string, []>("input_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_59_strides_0 = const()[name = tensor<string, []>("input_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_59_pad_0 = const()[name = tensor<string, []>("input_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_59_dilations_0 = const()[name = tensor<string, []>("input_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_59_groups_0 = const()[name = tensor<string, []>("input_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [2048, 512, 1, 1]> blocks_5_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_weight_to_fp16"), val = tensor<fp16, [2048, 512, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36985600)))];
+            tensor<fp16, [2048]> blocks_5_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39082816)))];
+            tensor<fp16, [1, 2048, 1, 1500]> input_59_cast_fp16 = conv(bias = blocks_5_mlp_0_bias_to_fp16, dilations = input_59_dilations_0, groups = input_59_groups_0, pad = input_59_pad_0, pad_type = input_59_pad_type_0, strides = input_59_strides_0, weight = blocks_5_mlp_0_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 2048, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_59_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<string, []> var_1101_pad_type_0 = const()[name = tensor<string, []>("op_1101_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1101_strides_0 = const()[name = tensor<string, []>("op_1101_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1101_pad_0 = const()[name = tensor<string, []>("op_1101_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1101_dilations_0 = const()[name = tensor<string, []>("op_1101_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1101_groups_0 = const()[name = tensor<string, []>("op_1101_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [512, 2048, 1, 1]> blocks_5_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_weight_to_fp16"), val = tensor<fp16, [512, 2048, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39086976)))];
+            tensor<fp16, [512]> blocks_5_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41184192)))];
+            tensor<fp16, [1, 512, 1, 1500]> var_1101_cast_fp16 = conv(bias = blocks_5_mlp_2_bias_to_fp16, dilations = var_1101_dilations_0, groups = var_1101_groups_0, pad = var_1101_pad_0, pad_type = var_1101_pad_type_0, strides = var_1101_strides_0, weight = blocks_5_mlp_2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("op_1101_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 1500]> inputs_cast_fp16 = add(x = inputs_23_cast_fp16, y = var_1101_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<int32, [1]> x_axes_0 = const()[name = tensor<string, []>("x_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [512]> x_gamma_0_to_fp16 = const()[name = tensor<string, []>("x_gamma_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41185280)))];
+            tensor<fp16, [512]> x_beta_0_to_fp16 = const()[name = tensor<string, []>("x_beta_0_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41186368)))];
+            tensor<fp16, []> var_1115_to_fp16 = const()[name = tensor<string, []>("op_1115_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 512, 1, 1500]> x_cast_fp16 = layer_norm(axes = x_axes_0, beta = x_beta_0_to_fp16, epsilon = var_1115_to_fp16, gamma = x_gamma_0_to_fp16, x = inputs_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
+            tensor<int32, [1]> var_1126_axes_0 = const()[name = tensor<string, []>("op_1126_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 512, 1500]> var_1126_cast_fp16 = squeeze(axes = var_1126_axes_0, x = x_cast_fp16)[name = tensor<string, []>("op_1126_cast_fp16")];
+            tensor<int32, [3]> var_1129_perm_0 = const()[name = tensor<string, []>("op_1129_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<string, []> var_1129_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_1129_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
+            tensor<fp16, [1, 1500, 512]> var_1129_cast_fp16 = transpose(perm = var_1129_perm_0, x = var_1126_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp32, [1, 1500, 512]> output = cast(dtype = var_1129_cast_fp16_to_fp32_dtype_0, x = var_1129_cast_fp16)[name = tensor<string, []>("cast_27")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/base/ggml-base-encoder.mlmodelc/weights/weight.bin b/base/ggml-base-encoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cb3d24c42f71f1a8da10dec6dd90d8268c837658
--- /dev/null
+++ b/base/ggml-base-encoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7f7b1747c1873721448575a03baf7cf56b0bc72f6e04df7d1f9ce305688b9a2
+size 41187456
diff --git a/base/ggml-base.bin b/base/ggml-base.bin
new file mode 100644
index 0000000000000000000000000000000000000000..17993a254ac4e9db7642a261cf53af3dc446145c
--- /dev/null
+++ b/base/ggml-base.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe
+size 147951465
diff --git a/large-v1/.DS_Store b/large-v1/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..0585c5ab640da137e4708c84a4b7e0d63c6360af
Binary files /dev/null and b/large-v1/.DS_Store differ
diff --git a/large-v1/ggml-large-v1-encoder.mlmodelc/analytics/coremldata.bin b/large-v1/ggml-large-v1-encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6dc984a46d0a814f3f51e1b9a397639a2cae2e9e
--- /dev/null
+++ b/large-v1/ggml-large-v1-encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ab6e6c98293af2c12bf584cdda19a464709cef895b408c568445cd111d206b9
+size 243
diff --git a/large-v1/ggml-large-v1-encoder.mlmodelc/coremldata.bin b/large-v1/ggml-large-v1-encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d3539e433ce80b83110a6d06e68290711f92c478
--- /dev/null
+++ b/large-v1/ggml-large-v1-encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bc95fdd4e4c14ec1ad848ab88c742c79010ffa3969276143703ffed87fc8763
+size 320
diff --git a/large-v1/ggml-large-v1-encoder.mlmodelc/metadata.json b/large-v1/ggml-large-v1-encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..a01304a9126b85f6f579e0334bb2dce26cd657d9
--- /dev/null
+++ b/large-v1/ggml-large-v1-encoder.mlmodelc/metadata.json
@@ -0,0 +1,71 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1500 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 1280]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 6,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 32,
+      "Gelu" : 34,
+      "LayerNorm" : 65,
+      "Transpose" : 33,
+      "Softmax" : 640,
+      "Squeeze" : 1,
+      "Cast" : 2,
+      "Add" : 65,
+      "Einsum" : 1280,
+      "ExpandDims" : 1,
+      "Split" : 96,
+      "Conv" : 194
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "12.0",
+      "tvOS" : "15.0",
+      "visionOS" : "1.0",
+      "watchOS" : "8.0",
+      "iOS" : "15.0",
+      "macCatalyst" : "15.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.2.2",
+      "com.github.apple.coremltools.version" : "8.3.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 80 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "coreml_encoder_large_v1",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/large-v1/ggml-large-v1-encoder.mlmodelc/model.mil b/large-v1/ggml-large-v1-encoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..8ab8cd9e6674b7e4122ebb511e9de5d9978c2bb6
--- /dev/null
+++ b/large-v1/ggml-large-v1-encoder.mlmodelc/model.mil
@@ -0,0 +1,5643 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.2.2"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
+{
+    func main<ios15>(tensor<fp32, [1, 80, 3000]> logmel_data) {
+            tensor<string, []> var_84_pad_type_0 = const()[name = tensor<string, []>("op_84_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_84_pad_0 = const()[name = tensor<string, []>("op_84_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_84_strides_0 = const()[name = tensor<string, []>("op_84_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_84_dilations_0 = const()[name = tensor<string, []>("op_84_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_84_groups_0 = const()[name = tensor<string, []>("op_84_groups_0"), val = tensor<int32, []>(1)];
+            tensor<string, []> logmel_data_to_fp16_dtype_0 = const()[name = tensor<string, []>("logmel_data_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [1280, 80, 3]> const_0_to_fp16 = const()[name = tensor<string, []>("const_0_to_fp16"), val = tensor<fp16, [1280, 80, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1280]> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614528)))];
+            tensor<fp16, [1, 80, 3000]> logmel_data_to_fp16 = cast(dtype = logmel_data_to_fp16_dtype_0, x = logmel_data)[name = tensor<string, []>("cast_132")];
+            tensor<fp16, [1, 1280, 3000]> var_84_cast_fp16 = conv(bias = const_1_to_fp16, dilations = var_84_dilations_0, groups = var_84_groups_0, pad = var_84_pad_0, pad_type = var_84_pad_type_0, strides = var_84_strides_0, weight = const_0_to_fp16, x = logmel_data_to_fp16)[name = tensor<string, []>("op_84_cast_fp16")];
+            tensor<string, []> input_1_mode_0 = const()[name = tensor<string, []>("input_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1280, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_84_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<string, []> var_102_pad_type_0 = const()[name = tensor<string, []>("op_102_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_102_pad_0 = const()[name = tensor<string, []>("op_102_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_102_strides_0 = const()[name = tensor<string, []>("op_102_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_102_dilations_0 = const()[name = tensor<string, []>("op_102_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_102_groups_0 = const()[name = tensor<string, []>("op_102_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 3]> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, [1280, 1280, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(617152)))];
+            tensor<fp16, [1280]> const_3_to_fp16 = const()[name = tensor<string, []>("const_3_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10447616)))];
+            tensor<fp16, [1, 1280, 1500]> var_102_cast_fp16 = conv(bias = const_3_to_fp16, dilations = var_102_dilations_0, groups = var_102_groups_0, pad = var_102_pad_0, pad_type = var_102_pad_type_0, strides = var_102_strides_0, weight = const_2_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("op_102_cast_fp16")];
+            tensor<string, []> x_3_mode_0 = const()[name = tensor<string, []>("x_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1280, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_102_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
+            tensor<fp16, [1280, 1500]> var_107_to_fp16 = const()[name = tensor<string, []>("op_107_to_fp16"), val = tensor<fp16, [1280, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10450240)))];
+            tensor<fp16, [1, 1280, 1500]> var_109_cast_fp16 = add(x = x_3_cast_fp16, y = var_107_to_fp16)[name = tensor<string, []>("op_109_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_109_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_124 = const()[name = tensor<string, []>("op_124"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14290304)))];
+            tensor<fp16, [1280]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14292928)))];
+            tensor<fp16, []> var_140_to_fp16 = const()[name = tensor<string, []>("op_140_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_3_cast_fp16 = layer_norm(axes = input_3_axes_0, beta = input_3_beta_0_to_fp16, epsilon = var_140_to_fp16, gamma = input_3_gamma_0_to_fp16, x = inputs_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<string, []> q_1_pad_type_0 = const()[name = tensor<string, []>("q_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_1_strides_0 = const()[name = tensor<string, []>("q_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_1_pad_0 = const()[name = tensor<string, []>("q_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_1_dilations_0 = const()[name = tensor<string, []>("q_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_1_groups_0 = const()[name = tensor<string, []>("q_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_175_weight_0_to_fp16 = const()[name = tensor<string, []>("op_175_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14295552)))];
+            tensor<fp16, [1280]> var_175_bias_0_to_fp16 = const()[name = tensor<string, []>("op_175_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17572416)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_175_cast_fp16 = conv(bias = var_175_bias_0_to_fp16, dilations = q_1_dilations_0, groups = q_1_groups_0, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = q_1_strides_0, weight = var_175_weight_0_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_175_cast_fp16")];
+            tensor<string, []> k_1_pad_type_0 = const()[name = tensor<string, []>("k_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_1_strides_0 = const()[name = tensor<string, []>("k_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_1_pad_0 = const()[name = tensor<string, []>("k_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_1_dilations_0 = const()[name = tensor<string, []>("k_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_1_groups_0 = const()[name = tensor<string, []>("k_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_0_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17575040)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_1_cast_fp16 = conv(dilations = k_1_dilations_0, groups = k_1_groups_0, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = k_1_strides_0, weight = blocks_0_attn_key_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("k_1_cast_fp16")];
+            tensor<string, []> var_173_pad_type_0 = const()[name = tensor<string, []>("op_173_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_173_strides_0 = const()[name = tensor<string, []>("op_173_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_173_pad_0 = const()[name = tensor<string, []>("op_173_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_173_dilations_0 = const()[name = tensor<string, []>("op_173_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_173_groups_0 = const()[name = tensor<string, []>("op_173_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_0_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20851904)))];
+            tensor<fp16, [1280]> blocks_0_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24128768)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_173_cast_fp16 = conv(bias = blocks_0_attn_value_bias_to_fp16, dilations = var_173_dilations_0, groups = var_173_groups_0, pad = var_173_pad_0, pad_type = var_173_pad_type_0, strides = var_173_strides_0, weight = blocks_0_attn_value_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_173_cast_fp16")];
+            tensor<int32, [20]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_176_axis_0 = const()[name = tensor<string, []>("op_176_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_19 = split(axis = var_176_axis_0, split_sizes = tile_0, x = var_175_cast_fp16)[name = tensor<string, []>("op_176_cast_fp16")];
+            tensor<int32, [4]> var_197_perm_0 = const()[name = tensor<string, []>("op_197_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_198_axis_0 = const()[name = tensor<string, []>("op_198_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_197_cast_fp16 = transpose(perm = var_197_perm_0, x = k_1_cast_fp16)[name = tensor<string, []>("transpose_32")];
+            tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_19 = split(axis = var_198_axis_0, split_sizes = tile_1, x = var_197_cast_fp16)[name = tensor<string, []>("op_198_cast_fp16")];
+            tensor<int32, [20]> tile_2 = const()[name = tensor<string, []>("tile_2"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_219_axis_0 = const()[name = tensor<string, []>("op_219_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_19 = split(axis = var_219_axis_0, split_sizes = tile_2, x = var_173_cast_fp16)[name = tensor<string, []>("op_219_cast_fp16")];
+            tensor<string, []> aw_1_equation_0 = const()[name = tensor<string, []>("aw_1_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1_cast_fp16 = einsum(equation = aw_1_equation_0, values = (var_198_cast_fp16_0, var_176_cast_fp16_0))[name = tensor<string, []>("aw_1_cast_fp16")];
+            tensor<string, []> aw_3_equation_0 = const()[name = tensor<string, []>("aw_3_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_3_cast_fp16 = einsum(equation = aw_3_equation_0, values = (var_198_cast_fp16_1, var_176_cast_fp16_1))[name = tensor<string, []>("aw_3_cast_fp16")];
+            tensor<string, []> aw_5_equation_0 = const()[name = tensor<string, []>("aw_5_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_5_cast_fp16 = einsum(equation = aw_5_equation_0, values = (var_198_cast_fp16_2, var_176_cast_fp16_2))[name = tensor<string, []>("aw_5_cast_fp16")];
+            tensor<string, []> aw_7_equation_0 = const()[name = tensor<string, []>("aw_7_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_7_cast_fp16 = einsum(equation = aw_7_equation_0, values = (var_198_cast_fp16_3, var_176_cast_fp16_3))[name = tensor<string, []>("aw_7_cast_fp16")];
+            tensor<string, []> aw_9_equation_0 = const()[name = tensor<string, []>("aw_9_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_9_cast_fp16 = einsum(equation = aw_9_equation_0, values = (var_198_cast_fp16_4, var_176_cast_fp16_4))[name = tensor<string, []>("aw_9_cast_fp16")];
+            tensor<string, []> aw_11_equation_0 = const()[name = tensor<string, []>("aw_11_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_11_cast_fp16 = einsum(equation = aw_11_equation_0, values = (var_198_cast_fp16_5, var_176_cast_fp16_5))[name = tensor<string, []>("aw_11_cast_fp16")];
+            tensor<string, []> aw_13_equation_0 = const()[name = tensor<string, []>("aw_13_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_13_cast_fp16 = einsum(equation = aw_13_equation_0, values = (var_198_cast_fp16_6, var_176_cast_fp16_6))[name = tensor<string, []>("aw_13_cast_fp16")];
+            tensor<string, []> aw_15_equation_0 = const()[name = tensor<string, []>("aw_15_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_15_cast_fp16 = einsum(equation = aw_15_equation_0, values = (var_198_cast_fp16_7, var_176_cast_fp16_7))[name = tensor<string, []>("aw_15_cast_fp16")];
+            tensor<string, []> aw_17_equation_0 = const()[name = tensor<string, []>("aw_17_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_17_cast_fp16 = einsum(equation = aw_17_equation_0, values = (var_198_cast_fp16_8, var_176_cast_fp16_8))[name = tensor<string, []>("aw_17_cast_fp16")];
+            tensor<string, []> aw_19_equation_0 = const()[name = tensor<string, []>("aw_19_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_19_cast_fp16 = einsum(equation = aw_19_equation_0, values = (var_198_cast_fp16_9, var_176_cast_fp16_9))[name = tensor<string, []>("aw_19_cast_fp16")];
+            tensor<string, []> aw_21_equation_0 = const()[name = tensor<string, []>("aw_21_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_21_cast_fp16 = einsum(equation = aw_21_equation_0, values = (var_198_cast_fp16_10, var_176_cast_fp16_10))[name = tensor<string, []>("aw_21_cast_fp16")];
+            tensor<string, []> aw_23_equation_0 = const()[name = tensor<string, []>("aw_23_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_23_cast_fp16 = einsum(equation = aw_23_equation_0, values = (var_198_cast_fp16_11, var_176_cast_fp16_11))[name = tensor<string, []>("aw_23_cast_fp16")];
+            tensor<string, []> aw_25_equation_0 = const()[name = tensor<string, []>("aw_25_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_25_cast_fp16 = einsum(equation = aw_25_equation_0, values = (var_198_cast_fp16_12, var_176_cast_fp16_12))[name = tensor<string, []>("aw_25_cast_fp16")];
+            tensor<string, []> aw_27_equation_0 = const()[name = tensor<string, []>("aw_27_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_27_cast_fp16 = einsum(equation = aw_27_equation_0, values = (var_198_cast_fp16_13, var_176_cast_fp16_13))[name = tensor<string, []>("aw_27_cast_fp16")];
+            tensor<string, []> aw_29_equation_0 = const()[name = tensor<string, []>("aw_29_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_29_cast_fp16 = einsum(equation = aw_29_equation_0, values = (var_198_cast_fp16_14, var_176_cast_fp16_14))[name = tensor<string, []>("aw_29_cast_fp16")];
+            tensor<string, []> aw_31_equation_0 = const()[name = tensor<string, []>("aw_31_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_31_cast_fp16 = einsum(equation = aw_31_equation_0, values = (var_198_cast_fp16_15, var_176_cast_fp16_15))[name = tensor<string, []>("aw_31_cast_fp16")];
+            tensor<string, []> aw_33_equation_0 = const()[name = tensor<string, []>("aw_33_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_33_cast_fp16 = einsum(equation = aw_33_equation_0, values = (var_198_cast_fp16_16, var_176_cast_fp16_16))[name = tensor<string, []>("aw_33_cast_fp16")];
+            tensor<string, []> aw_35_equation_0 = const()[name = tensor<string, []>("aw_35_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_35_cast_fp16 = einsum(equation = aw_35_equation_0, values = (var_198_cast_fp16_17, var_176_cast_fp16_17))[name = tensor<string, []>("aw_35_cast_fp16")];
+            tensor<string, []> aw_37_equation_0 = const()[name = tensor<string, []>("aw_37_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_37_cast_fp16 = einsum(equation = aw_37_equation_0, values = (var_198_cast_fp16_18, var_176_cast_fp16_18))[name = tensor<string, []>("aw_37_cast_fp16")];
+            tensor<string, []> aw_39_equation_0 = const()[name = tensor<string, []>("aw_39_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_39_cast_fp16 = einsum(equation = aw_39_equation_0, values = (var_198_cast_fp16_19, var_176_cast_fp16_19))[name = tensor<string, []>("aw_39_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_280_cast_fp16 = softmax(axis = var_124, x = aw_1_cast_fp16)[name = tensor<string, []>("op_280_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_281_cast_fp16 = softmax(axis = var_124, x = aw_3_cast_fp16)[name = tensor<string, []>("op_281_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_282_cast_fp16 = softmax(axis = var_124, x = aw_5_cast_fp16)[name = tensor<string, []>("op_282_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_283_cast_fp16 = softmax(axis = var_124, x = aw_7_cast_fp16)[name = tensor<string, []>("op_283_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_284_cast_fp16 = softmax(axis = var_124, x = aw_9_cast_fp16)[name = tensor<string, []>("op_284_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_285_cast_fp16 = softmax(axis = var_124, x = aw_11_cast_fp16)[name = tensor<string, []>("op_285_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_286_cast_fp16 = softmax(axis = var_124, x = aw_13_cast_fp16)[name = tensor<string, []>("op_286_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_287_cast_fp16 = softmax(axis = var_124, x = aw_15_cast_fp16)[name = tensor<string, []>("op_287_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_288_cast_fp16 = softmax(axis = var_124, x = aw_17_cast_fp16)[name = tensor<string, []>("op_288_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_289_cast_fp16 = softmax(axis = var_124, x = aw_19_cast_fp16)[name = tensor<string, []>("op_289_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_290_cast_fp16 = softmax(axis = var_124, x = aw_21_cast_fp16)[name = tensor<string, []>("op_290_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_291_cast_fp16 = softmax(axis = var_124, x = aw_23_cast_fp16)[name = tensor<string, []>("op_291_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_292_cast_fp16 = softmax(axis = var_124, x = aw_25_cast_fp16)[name = tensor<string, []>("op_292_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_293_cast_fp16 = softmax(axis = var_124, x = aw_27_cast_fp16)[name = tensor<string, []>("op_293_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_294_cast_fp16 = softmax(axis = var_124, x = aw_29_cast_fp16)[name = tensor<string, []>("op_294_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_295_cast_fp16 = softmax(axis = var_124, x = aw_31_cast_fp16)[name = tensor<string, []>("op_295_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_296_cast_fp16 = softmax(axis = var_124, x = aw_33_cast_fp16)[name = tensor<string, []>("op_296_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_297_cast_fp16 = softmax(axis = var_124, x = aw_35_cast_fp16)[name = tensor<string, []>("op_297_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_298_cast_fp16 = softmax(axis = var_124, x = aw_37_cast_fp16)[name = tensor<string, []>("op_298_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_299_cast_fp16 = softmax(axis = var_124, x = aw_39_cast_fp16)[name = tensor<string, []>("op_299_cast_fp16")];
+            tensor<string, []> var_301_equation_0 = const()[name = tensor<string, []>("op_301_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_301_cast_fp16 = einsum(equation = var_301_equation_0, values = (var_219_cast_fp16_0, var_280_cast_fp16))[name = tensor<string, []>("op_301_cast_fp16")];
+            tensor<string, []> var_303_equation_0 = const()[name = tensor<string, []>("op_303_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_303_cast_fp16 = einsum(equation = var_303_equation_0, values = (var_219_cast_fp16_1, var_281_cast_fp16))[name = tensor<string, []>("op_303_cast_fp16")];
+            tensor<string, []> var_305_equation_0 = const()[name = tensor<string, []>("op_305_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_305_cast_fp16 = einsum(equation = var_305_equation_0, values = (var_219_cast_fp16_2, var_282_cast_fp16))[name = tensor<string, []>("op_305_cast_fp16")];
+            tensor<string, []> var_307_equation_0 = const()[name = tensor<string, []>("op_307_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_307_cast_fp16 = einsum(equation = var_307_equation_0, values = (var_219_cast_fp16_3, var_283_cast_fp16))[name = tensor<string, []>("op_307_cast_fp16")];
+            tensor<string, []> var_309_equation_0 = const()[name = tensor<string, []>("op_309_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_309_cast_fp16 = einsum(equation = var_309_equation_0, values = (var_219_cast_fp16_4, var_284_cast_fp16))[name = tensor<string, []>("op_309_cast_fp16")];
+            tensor<string, []> var_311_equation_0 = const()[name = tensor<string, []>("op_311_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_311_cast_fp16 = einsum(equation = var_311_equation_0, values = (var_219_cast_fp16_5, var_285_cast_fp16))[name = tensor<string, []>("op_311_cast_fp16")];
+            tensor<string, []> var_313_equation_0 = const()[name = tensor<string, []>("op_313_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_313_cast_fp16 = einsum(equation = var_313_equation_0, values = (var_219_cast_fp16_6, var_286_cast_fp16))[name = tensor<string, []>("op_313_cast_fp16")];
+            tensor<string, []> var_315_equation_0 = const()[name = tensor<string, []>("op_315_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_315_cast_fp16 = einsum(equation = var_315_equation_0, values = (var_219_cast_fp16_7, var_287_cast_fp16))[name = tensor<string, []>("op_315_cast_fp16")];
+            tensor<string, []> var_317_equation_0 = const()[name = tensor<string, []>("op_317_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_317_cast_fp16 = einsum(equation = var_317_equation_0, values = (var_219_cast_fp16_8, var_288_cast_fp16))[name = tensor<string, []>("op_317_cast_fp16")];
+            tensor<string, []> var_319_equation_0 = const()[name = tensor<string, []>("op_319_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_319_cast_fp16 = einsum(equation = var_319_equation_0, values = (var_219_cast_fp16_9, var_289_cast_fp16))[name = tensor<string, []>("op_319_cast_fp16")];
+            tensor<string, []> var_321_equation_0 = const()[name = tensor<string, []>("op_321_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_321_cast_fp16 = einsum(equation = var_321_equation_0, values = (var_219_cast_fp16_10, var_290_cast_fp16))[name = tensor<string, []>("op_321_cast_fp16")];
+            tensor<string, []> var_323_equation_0 = const()[name = tensor<string, []>("op_323_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_323_cast_fp16 = einsum(equation = var_323_equation_0, values = (var_219_cast_fp16_11, var_291_cast_fp16))[name = tensor<string, []>("op_323_cast_fp16")];
+            tensor<string, []> var_325_equation_0 = const()[name = tensor<string, []>("op_325_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_325_cast_fp16 = einsum(equation = var_325_equation_0, values = (var_219_cast_fp16_12, var_292_cast_fp16))[name = tensor<string, []>("op_325_cast_fp16")];
+            tensor<string, []> var_327_equation_0 = const()[name = tensor<string, []>("op_327_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_327_cast_fp16 = einsum(equation = var_327_equation_0, values = (var_219_cast_fp16_13, var_293_cast_fp16))[name = tensor<string, []>("op_327_cast_fp16")];
+            tensor<string, []> var_329_equation_0 = const()[name = tensor<string, []>("op_329_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_329_cast_fp16 = einsum(equation = var_329_equation_0, values = (var_219_cast_fp16_14, var_294_cast_fp16))[name = tensor<string, []>("op_329_cast_fp16")];
+            tensor<string, []> var_331_equation_0 = const()[name = tensor<string, []>("op_331_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_331_cast_fp16 = einsum(equation = var_331_equation_0, values = (var_219_cast_fp16_15, var_295_cast_fp16))[name = tensor<string, []>("op_331_cast_fp16")];
+            tensor<string, []> var_333_equation_0 = const()[name = tensor<string, []>("op_333_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_333_cast_fp16 = einsum(equation = var_333_equation_0, values = (var_219_cast_fp16_16, var_296_cast_fp16))[name = tensor<string, []>("op_333_cast_fp16")];
+            tensor<string, []> var_335_equation_0 = const()[name = tensor<string, []>("op_335_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_335_cast_fp16 = einsum(equation = var_335_equation_0, values = (var_219_cast_fp16_17, var_297_cast_fp16))[name = tensor<string, []>("op_335_cast_fp16")];
+            tensor<string, []> var_337_equation_0 = const()[name = tensor<string, []>("op_337_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_337_cast_fp16 = einsum(equation = var_337_equation_0, values = (var_219_cast_fp16_18, var_298_cast_fp16))[name = tensor<string, []>("op_337_cast_fp16")];
+            tensor<string, []> var_339_equation_0 = const()[name = tensor<string, []>("op_339_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_339_cast_fp16 = einsum(equation = var_339_equation_0, values = (var_219_cast_fp16_19, var_299_cast_fp16))[name = tensor<string, []>("op_339_cast_fp16")];
+            tensor<bool, []> input_5_interleave_0 = const()[name = tensor<string, []>("input_5_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_5_cast_fp16 = concat(axis = var_124, interleave = input_5_interleave_0, values = (var_301_cast_fp16, var_303_cast_fp16, var_305_cast_fp16, var_307_cast_fp16, var_309_cast_fp16, var_311_cast_fp16, var_313_cast_fp16, var_315_cast_fp16, var_317_cast_fp16, var_319_cast_fp16, var_321_cast_fp16, var_323_cast_fp16, var_325_cast_fp16, var_327_cast_fp16, var_329_cast_fp16, var_331_cast_fp16, var_333_cast_fp16, var_335_cast_fp16, var_337_cast_fp16, var_339_cast_fp16))[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> var_348_pad_type_0 = const()[name = tensor<string, []>("op_348_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_348_strides_0 = const()[name = tensor<string, []>("op_348_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_348_pad_0 = const()[name = tensor<string, []>("op_348_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_348_dilations_0 = const()[name = tensor<string, []>("op_348_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_348_groups_0 = const()[name = tensor<string, []>("op_348_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_0_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24131392)))];
+            tensor<fp16, [1280]> blocks_0_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27408256)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_348_cast_fp16 = conv(bias = blocks_0_attn_out_bias_to_fp16, dilations = var_348_dilations_0, groups = var_348_groups_0, pad = var_348_pad_0, pad_type = var_348_pad_type_0, strides = var_348_strides_0, weight = blocks_0_attn_out_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("op_348_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = var_348_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> input_7_axes_0 = const()[name = tensor<string, []>("input_7_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_7_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_7_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27410880)))];
+            tensor<fp16, [1280]> input_7_beta_0_to_fp16 = const()[name = tensor<string, []>("input_7_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27413504)))];
+            tensor<fp16, []> var_358_to_fp16 = const()[name = tensor<string, []>("op_358_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = input_7_beta_0_to_fp16, epsilon = var_358_to_fp16, gamma = input_7_gamma_0_to_fp16, x = inputs_3_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_pad_type_0 = const()[name = tensor<string, []>("input_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_9_strides_0 = const()[name = tensor<string, []>("input_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_9_pad_0 = const()[name = tensor<string, []>("input_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_9_dilations_0 = const()[name = tensor<string, []>("input_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_9_groups_0 = const()[name = tensor<string, []>("input_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_0_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27416128)))];
+            tensor<fp16, [5120]> blocks_0_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40523392)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_9_cast_fp16 = conv(bias = blocks_0_mlp_0_bias_to_fp16, dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = blocks_0_mlp_0_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<string, []> input_11_mode_0 = const()[name = tensor<string, []>("input_11_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_11_cast_fp16 = gelu(mode = input_11_mode_0, x = input_9_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<string, []> var_384_pad_type_0 = const()[name = tensor<string, []>("op_384_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_384_strides_0 = const()[name = tensor<string, []>("op_384_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_384_pad_0 = const()[name = tensor<string, []>("op_384_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_384_dilations_0 = const()[name = tensor<string, []>("op_384_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_384_groups_0 = const()[name = tensor<string, []>("op_384_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_0_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40533696)))];
+            tensor<fp16, [1280]> blocks_0_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53640960)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_384_cast_fp16 = conv(bias = blocks_0_mlp_2_bias_to_fp16, dilations = var_384_dilations_0, groups = var_384_groups_0, pad = var_384_pad_0, pad_type = var_384_pad_type_0, strides = var_384_strides_0, weight = blocks_0_mlp_2_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("op_384_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = var_384_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_393 = const()[name = tensor<string, []>("op_393"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_13_axes_0 = const()[name = tensor<string, []>("input_13_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_13_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53643584)))];
+            tensor<fp16, [1280]> input_13_beta_0_to_fp16 = const()[name = tensor<string, []>("input_13_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53646208)))];
+            tensor<fp16, []> var_409_to_fp16 = const()[name = tensor<string, []>("op_409_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = input_13_beta_0_to_fp16, epsilon = var_409_to_fp16, gamma = input_13_gamma_0_to_fp16, x = inputs_5_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> q_3_pad_type_0 = const()[name = tensor<string, []>("q_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_3_strides_0 = const()[name = tensor<string, []>("q_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_3_pad_0 = const()[name = tensor<string, []>("q_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_3_dilations_0 = const()[name = tensor<string, []>("q_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_3_groups_0 = const()[name = tensor<string, []>("q_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_444_weight_0_to_fp16 = const()[name = tensor<string, []>("op_444_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53648832)))];
+            tensor<fp16, [1280]> var_444_bias_0_to_fp16 = const()[name = tensor<string, []>("op_444_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56925696)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_444_cast_fp16 = conv(bias = var_444_bias_0_to_fp16, dilations = q_3_dilations_0, groups = q_3_groups_0, pad = q_3_pad_0, pad_type = q_3_pad_type_0, strides = q_3_strides_0, weight = var_444_weight_0_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_444_cast_fp16")];
+            tensor<string, []> k_3_pad_type_0 = const()[name = tensor<string, []>("k_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_3_strides_0 = const()[name = tensor<string, []>("k_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_3_pad_0 = const()[name = tensor<string, []>("k_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_3_dilations_0 = const()[name = tensor<string, []>("k_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_3_groups_0 = const()[name = tensor<string, []>("k_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_1_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56928320)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_3_cast_fp16 = conv(dilations = k_3_dilations_0, groups = k_3_groups_0, pad = k_3_pad_0, pad_type = k_3_pad_type_0, strides = k_3_strides_0, weight = blocks_1_attn_key_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("k_3_cast_fp16")];
+            tensor<string, []> var_442_pad_type_0 = const()[name = tensor<string, []>("op_442_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_442_strides_0 = const()[name = tensor<string, []>("op_442_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_442_pad_0 = const()[name = tensor<string, []>("op_442_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_442_dilations_0 = const()[name = tensor<string, []>("op_442_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_442_groups_0 = const()[name = tensor<string, []>("op_442_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_1_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60205184)))];
+            tensor<fp16, [1280]> blocks_1_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63482048)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_442_cast_fp16 = conv(bias = blocks_1_attn_value_bias_to_fp16, dilations = var_442_dilations_0, groups = var_442_groups_0, pad = var_442_pad_0, pad_type = var_442_pad_type_0, strides = var_442_strides_0, weight = blocks_1_attn_value_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_442_cast_fp16")];
+            tensor<int32, [20]> tile_3 = const()[name = tensor<string, []>("tile_3"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_445_axis_0 = const()[name = tensor<string, []>("op_445_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_19 = split(axis = var_445_axis_0, split_sizes = tile_3, x = var_444_cast_fp16)[name = tensor<string, []>("op_445_cast_fp16")];
+            tensor<int32, [4]> var_466_perm_0 = const()[name = tensor<string, []>("op_466_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_4 = const()[name = tensor<string, []>("tile_4"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_467_axis_0 = const()[name = tensor<string, []>("op_467_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_466_cast_fp16 = transpose(perm = var_466_perm_0, x = k_3_cast_fp16)[name = tensor<string, []>("transpose_31")];
+            tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_19 = split(axis = var_467_axis_0, split_sizes = tile_4, x = var_466_cast_fp16)[name = tensor<string, []>("op_467_cast_fp16")];
+            tensor<int32, [20]> tile_5 = const()[name = tensor<string, []>("tile_5"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_488_axis_0 = const()[name = tensor<string, []>("op_488_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_19 = split(axis = var_488_axis_0, split_sizes = tile_5, x = var_442_cast_fp16)[name = tensor<string, []>("op_488_cast_fp16")];
+            tensor<string, []> aw_41_equation_0 = const()[name = tensor<string, []>("aw_41_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_41_cast_fp16 = einsum(equation = aw_41_equation_0, values = (var_467_cast_fp16_0, var_445_cast_fp16_0))[name = tensor<string, []>("aw_41_cast_fp16")];
+            tensor<string, []> aw_43_equation_0 = const()[name = tensor<string, []>("aw_43_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_43_cast_fp16 = einsum(equation = aw_43_equation_0, values = (var_467_cast_fp16_1, var_445_cast_fp16_1))[name = tensor<string, []>("aw_43_cast_fp16")];
+            tensor<string, []> aw_45_equation_0 = const()[name = tensor<string, []>("aw_45_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_45_cast_fp16 = einsum(equation = aw_45_equation_0, values = (var_467_cast_fp16_2, var_445_cast_fp16_2))[name = tensor<string, []>("aw_45_cast_fp16")];
+            tensor<string, []> aw_47_equation_0 = const()[name = tensor<string, []>("aw_47_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_47_cast_fp16 = einsum(equation = aw_47_equation_0, values = (var_467_cast_fp16_3, var_445_cast_fp16_3))[name = tensor<string, []>("aw_47_cast_fp16")];
+            tensor<string, []> aw_49_equation_0 = const()[name = tensor<string, []>("aw_49_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_49_cast_fp16 = einsum(equation = aw_49_equation_0, values = (var_467_cast_fp16_4, var_445_cast_fp16_4))[name = tensor<string, []>("aw_49_cast_fp16")];
+            tensor<string, []> aw_51_equation_0 = const()[name = tensor<string, []>("aw_51_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_51_cast_fp16 = einsum(equation = aw_51_equation_0, values = (var_467_cast_fp16_5, var_445_cast_fp16_5))[name = tensor<string, []>("aw_51_cast_fp16")];
+            tensor<string, []> aw_53_equation_0 = const()[name = tensor<string, []>("aw_53_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_53_cast_fp16 = einsum(equation = aw_53_equation_0, values = (var_467_cast_fp16_6, var_445_cast_fp16_6))[name = tensor<string, []>("aw_53_cast_fp16")];
+            tensor<string, []> aw_55_equation_0 = const()[name = tensor<string, []>("aw_55_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_55_cast_fp16 = einsum(equation = aw_55_equation_0, values = (var_467_cast_fp16_7, var_445_cast_fp16_7))[name = tensor<string, []>("aw_55_cast_fp16")];
+            tensor<string, []> aw_57_equation_0 = const()[name = tensor<string, []>("aw_57_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_57_cast_fp16 = einsum(equation = aw_57_equation_0, values = (var_467_cast_fp16_8, var_445_cast_fp16_8))[name = tensor<string, []>("aw_57_cast_fp16")];
+            tensor<string, []> aw_59_equation_0 = const()[name = tensor<string, []>("aw_59_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_59_cast_fp16 = einsum(equation = aw_59_equation_0, values = (var_467_cast_fp16_9, var_445_cast_fp16_9))[name = tensor<string, []>("aw_59_cast_fp16")];
+            tensor<string, []> aw_61_equation_0 = const()[name = tensor<string, []>("aw_61_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_61_cast_fp16 = einsum(equation = aw_61_equation_0, values = (var_467_cast_fp16_10, var_445_cast_fp16_10))[name = tensor<string, []>("aw_61_cast_fp16")];
+            tensor<string, []> aw_63_equation_0 = const()[name = tensor<string, []>("aw_63_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_63_cast_fp16 = einsum(equation = aw_63_equation_0, values = (var_467_cast_fp16_11, var_445_cast_fp16_11))[name = tensor<string, []>("aw_63_cast_fp16")];
+            tensor<string, []> aw_65_equation_0 = const()[name = tensor<string, []>("aw_65_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_65_cast_fp16 = einsum(equation = aw_65_equation_0, values = (var_467_cast_fp16_12, var_445_cast_fp16_12))[name = tensor<string, []>("aw_65_cast_fp16")];
+            tensor<string, []> aw_67_equation_0 = const()[name = tensor<string, []>("aw_67_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_67_cast_fp16 = einsum(equation = aw_67_equation_0, values = (var_467_cast_fp16_13, var_445_cast_fp16_13))[name = tensor<string, []>("aw_67_cast_fp16")];
+            tensor<string, []> aw_69_equation_0 = const()[name = tensor<string, []>("aw_69_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_69_cast_fp16 = einsum(equation = aw_69_equation_0, values = (var_467_cast_fp16_14, var_445_cast_fp16_14))[name = tensor<string, []>("aw_69_cast_fp16")];
+            tensor<string, []> aw_71_equation_0 = const()[name = tensor<string, []>("aw_71_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_71_cast_fp16 = einsum(equation = aw_71_equation_0, values = (var_467_cast_fp16_15, var_445_cast_fp16_15))[name = tensor<string, []>("aw_71_cast_fp16")];
+            tensor<string, []> aw_73_equation_0 = const()[name = tensor<string, []>("aw_73_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_73_cast_fp16 = einsum(equation = aw_73_equation_0, values = (var_467_cast_fp16_16, var_445_cast_fp16_16))[name = tensor<string, []>("aw_73_cast_fp16")];
+            tensor<string, []> aw_75_equation_0 = const()[name = tensor<string, []>("aw_75_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_75_cast_fp16 = einsum(equation = aw_75_equation_0, values = (var_467_cast_fp16_17, var_445_cast_fp16_17))[name = tensor<string, []>("aw_75_cast_fp16")];
+            tensor<string, []> aw_77_equation_0 = const()[name = tensor<string, []>("aw_77_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_77_cast_fp16 = einsum(equation = aw_77_equation_0, values = (var_467_cast_fp16_18, var_445_cast_fp16_18))[name = tensor<string, []>("aw_77_cast_fp16")];
+            tensor<string, []> aw_79_equation_0 = const()[name = tensor<string, []>("aw_79_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_79_cast_fp16 = einsum(equation = aw_79_equation_0, values = (var_467_cast_fp16_19, var_445_cast_fp16_19))[name = tensor<string, []>("aw_79_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_549_cast_fp16 = softmax(axis = var_393, x = aw_41_cast_fp16)[name = tensor<string, []>("op_549_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_550_cast_fp16 = softmax(axis = var_393, x = aw_43_cast_fp16)[name = tensor<string, []>("op_550_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_551_cast_fp16 = softmax(axis = var_393, x = aw_45_cast_fp16)[name = tensor<string, []>("op_551_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_552_cast_fp16 = softmax(axis = var_393, x = aw_47_cast_fp16)[name = tensor<string, []>("op_552_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_553_cast_fp16 = softmax(axis = var_393, x = aw_49_cast_fp16)[name = tensor<string, []>("op_553_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_554_cast_fp16 = softmax(axis = var_393, x = aw_51_cast_fp16)[name = tensor<string, []>("op_554_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_555_cast_fp16 = softmax(axis = var_393, x = aw_53_cast_fp16)[name = tensor<string, []>("op_555_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_556_cast_fp16 = softmax(axis = var_393, x = aw_55_cast_fp16)[name = tensor<string, []>("op_556_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_557_cast_fp16 = softmax(axis = var_393, x = aw_57_cast_fp16)[name = tensor<string, []>("op_557_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_558_cast_fp16 = softmax(axis = var_393, x = aw_59_cast_fp16)[name = tensor<string, []>("op_558_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_559_cast_fp16 = softmax(axis = var_393, x = aw_61_cast_fp16)[name = tensor<string, []>("op_559_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_560_cast_fp16 = softmax(axis = var_393, x = aw_63_cast_fp16)[name = tensor<string, []>("op_560_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_561_cast_fp16 = softmax(axis = var_393, x = aw_65_cast_fp16)[name = tensor<string, []>("op_561_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_562_cast_fp16 = softmax(axis = var_393, x = aw_67_cast_fp16)[name = tensor<string, []>("op_562_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_563_cast_fp16 = softmax(axis = var_393, x = aw_69_cast_fp16)[name = tensor<string, []>("op_563_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_564_cast_fp16 = softmax(axis = var_393, x = aw_71_cast_fp16)[name = tensor<string, []>("op_564_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_565_cast_fp16 = softmax(axis = var_393, x = aw_73_cast_fp16)[name = tensor<string, []>("op_565_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_566_cast_fp16 = softmax(axis = var_393, x = aw_75_cast_fp16)[name = tensor<string, []>("op_566_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_567_cast_fp16 = softmax(axis = var_393, x = aw_77_cast_fp16)[name = tensor<string, []>("op_567_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_568_cast_fp16 = softmax(axis = var_393, x = aw_79_cast_fp16)[name = tensor<string, []>("op_568_cast_fp16")];
+            tensor<string, []> var_570_equation_0 = const()[name = tensor<string, []>("op_570_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_570_cast_fp16 = einsum(equation = var_570_equation_0, values = (var_488_cast_fp16_0, var_549_cast_fp16))[name = tensor<string, []>("op_570_cast_fp16")];
+            tensor<string, []> var_572_equation_0 = const()[name = tensor<string, []>("op_572_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_572_cast_fp16 = einsum(equation = var_572_equation_0, values = (var_488_cast_fp16_1, var_550_cast_fp16))[name = tensor<string, []>("op_572_cast_fp16")];
+            tensor<string, []> var_574_equation_0 = const()[name = tensor<string, []>("op_574_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_574_cast_fp16 = einsum(equation = var_574_equation_0, values = (var_488_cast_fp16_2, var_551_cast_fp16))[name = tensor<string, []>("op_574_cast_fp16")];
+            tensor<string, []> var_576_equation_0 = const()[name = tensor<string, []>("op_576_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_576_cast_fp16 = einsum(equation = var_576_equation_0, values = (var_488_cast_fp16_3, var_552_cast_fp16))[name = tensor<string, []>("op_576_cast_fp16")];
+            tensor<string, []> var_578_equation_0 = const()[name = tensor<string, []>("op_578_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_578_cast_fp16 = einsum(equation = var_578_equation_0, values = (var_488_cast_fp16_4, var_553_cast_fp16))[name = tensor<string, []>("op_578_cast_fp16")];
+            tensor<string, []> var_580_equation_0 = const()[name = tensor<string, []>("op_580_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_580_cast_fp16 = einsum(equation = var_580_equation_0, values = (var_488_cast_fp16_5, var_554_cast_fp16))[name = tensor<string, []>("op_580_cast_fp16")];
+            tensor<string, []> var_582_equation_0 = const()[name = tensor<string, []>("op_582_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_582_cast_fp16 = einsum(equation = var_582_equation_0, values = (var_488_cast_fp16_6, var_555_cast_fp16))[name = tensor<string, []>("op_582_cast_fp16")];
+            tensor<string, []> var_584_equation_0 = const()[name = tensor<string, []>("op_584_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_584_cast_fp16 = einsum(equation = var_584_equation_0, values = (var_488_cast_fp16_7, var_556_cast_fp16))[name = tensor<string, []>("op_584_cast_fp16")];
+            tensor<string, []> var_586_equation_0 = const()[name = tensor<string, []>("op_586_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_586_cast_fp16 = einsum(equation = var_586_equation_0, values = (var_488_cast_fp16_8, var_557_cast_fp16))[name = tensor<string, []>("op_586_cast_fp16")];
+            tensor<string, []> var_588_equation_0 = const()[name = tensor<string, []>("op_588_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_588_cast_fp16 = einsum(equation = var_588_equation_0, values = (var_488_cast_fp16_9, var_558_cast_fp16))[name = tensor<string, []>("op_588_cast_fp16")];
+            tensor<string, []> var_590_equation_0 = const()[name = tensor<string, []>("op_590_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_590_cast_fp16 = einsum(equation = var_590_equation_0, values = (var_488_cast_fp16_10, var_559_cast_fp16))[name = tensor<string, []>("op_590_cast_fp16")];
+            tensor<string, []> var_592_equation_0 = const()[name = tensor<string, []>("op_592_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_592_cast_fp16 = einsum(equation = var_592_equation_0, values = (var_488_cast_fp16_11, var_560_cast_fp16))[name = tensor<string, []>("op_592_cast_fp16")];
+            tensor<string, []> var_594_equation_0 = const()[name = tensor<string, []>("op_594_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_594_cast_fp16 = einsum(equation = var_594_equation_0, values = (var_488_cast_fp16_12, var_561_cast_fp16))[name = tensor<string, []>("op_594_cast_fp16")];
+            tensor<string, []> var_596_equation_0 = const()[name = tensor<string, []>("op_596_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_596_cast_fp16 = einsum(equation = var_596_equation_0, values = (var_488_cast_fp16_13, var_562_cast_fp16))[name = tensor<string, []>("op_596_cast_fp16")];
+            tensor<string, []> var_598_equation_0 = const()[name = tensor<string, []>("op_598_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_598_cast_fp16 = einsum(equation = var_598_equation_0, values = (var_488_cast_fp16_14, var_563_cast_fp16))[name = tensor<string, []>("op_598_cast_fp16")];
+            tensor<string, []> var_600_equation_0 = const()[name = tensor<string, []>("op_600_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_600_cast_fp16 = einsum(equation = var_600_equation_0, values = (var_488_cast_fp16_15, var_564_cast_fp16))[name = tensor<string, []>("op_600_cast_fp16")];
+            tensor<string, []> var_602_equation_0 = const()[name = tensor<string, []>("op_602_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_602_cast_fp16 = einsum(equation = var_602_equation_0, values = (var_488_cast_fp16_16, var_565_cast_fp16))[name = tensor<string, []>("op_602_cast_fp16")];
+            tensor<string, []> var_604_equation_0 = const()[name = tensor<string, []>("op_604_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_604_cast_fp16 = einsum(equation = var_604_equation_0, values = (var_488_cast_fp16_17, var_566_cast_fp16))[name = tensor<string, []>("op_604_cast_fp16")];
+            tensor<string, []> var_606_equation_0 = const()[name = tensor<string, []>("op_606_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_606_cast_fp16 = einsum(equation = var_606_equation_0, values = (var_488_cast_fp16_18, var_567_cast_fp16))[name = tensor<string, []>("op_606_cast_fp16")];
+            tensor<string, []> var_608_equation_0 = const()[name = tensor<string, []>("op_608_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_608_cast_fp16 = einsum(equation = var_608_equation_0, values = (var_488_cast_fp16_19, var_568_cast_fp16))[name = tensor<string, []>("op_608_cast_fp16")];
+            tensor<bool, []> input_15_interleave_0 = const()[name = tensor<string, []>("input_15_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_15_cast_fp16 = concat(axis = var_393, interleave = input_15_interleave_0, values = (var_570_cast_fp16, var_572_cast_fp16, var_574_cast_fp16, var_576_cast_fp16, var_578_cast_fp16, var_580_cast_fp16, var_582_cast_fp16, var_584_cast_fp16, var_586_cast_fp16, var_588_cast_fp16, var_590_cast_fp16, var_592_cast_fp16, var_594_cast_fp16, var_596_cast_fp16, var_598_cast_fp16, var_600_cast_fp16, var_602_cast_fp16, var_604_cast_fp16, var_606_cast_fp16, var_608_cast_fp16))[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<string, []> var_617_pad_type_0 = const()[name = tensor<string, []>("op_617_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_617_strides_0 = const()[name = tensor<string, []>("op_617_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_617_pad_0 = const()[name = tensor<string, []>("op_617_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_617_dilations_0 = const()[name = tensor<string, []>("op_617_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_617_groups_0 = const()[name = tensor<string, []>("op_617_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_1_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63484672)))];
+            tensor<fp16, [1280]> blocks_1_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66761536)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_617_cast_fp16 = conv(bias = blocks_1_attn_out_bias_to_fp16, dilations = var_617_dilations_0, groups = var_617_groups_0, pad = var_617_pad_0, pad_type = var_617_pad_type_0, strides = var_617_strides_0, weight = blocks_1_attn_out_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("op_617_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = var_617_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> input_17_axes_0 = const()[name = tensor<string, []>("input_17_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_17_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_17_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66764160)))];
+            tensor<fp16, [1280]> input_17_beta_0_to_fp16 = const()[name = tensor<string, []>("input_17_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66766784)))];
+            tensor<fp16, []> var_627_to_fp16 = const()[name = tensor<string, []>("op_627_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_17_cast_fp16 = layer_norm(axes = input_17_axes_0, beta = input_17_beta_0_to_fp16, epsilon = var_627_to_fp16, gamma = input_17_gamma_0_to_fp16, x = inputs_7_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_pad_type_0 = const()[name = tensor<string, []>("input_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_19_strides_0 = const()[name = tensor<string, []>("input_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_19_pad_0 = const()[name = tensor<string, []>("input_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_19_dilations_0 = const()[name = tensor<string, []>("input_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_19_groups_0 = const()[name = tensor<string, []>("input_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_1_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66769408)))];
+            tensor<fp16, [5120]> blocks_1_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79876672)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_19_cast_fp16 = conv(bias = blocks_1_mlp_0_bias_to_fp16, dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = blocks_1_mlp_0_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<string, []> input_21_mode_0 = const()[name = tensor<string, []>("input_21_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_21_cast_fp16 = gelu(mode = input_21_mode_0, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> var_653_pad_type_0 = const()[name = tensor<string, []>("op_653_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_653_strides_0 = const()[name = tensor<string, []>("op_653_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_653_pad_0 = const()[name = tensor<string, []>("op_653_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_653_dilations_0 = const()[name = tensor<string, []>("op_653_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_653_groups_0 = const()[name = tensor<string, []>("op_653_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_1_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79886976)))];
+            tensor<fp16, [1280]> blocks_1_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92994240)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_653_cast_fp16 = conv(bias = blocks_1_mlp_2_bias_to_fp16, dilations = var_653_dilations_0, groups = var_653_groups_0, pad = var_653_pad_0, pad_type = var_653_pad_type_0, strides = var_653_strides_0, weight = blocks_1_mlp_2_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("op_653_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = var_653_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_662 = const()[name = tensor<string, []>("op_662"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_23_axes_0 = const()[name = tensor<string, []>("input_23_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_23_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92996864)))];
+            tensor<fp16, [1280]> input_23_beta_0_to_fp16 = const()[name = tensor<string, []>("input_23_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92999488)))];
+            tensor<fp16, []> var_678_to_fp16 = const()[name = tensor<string, []>("op_678_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_23_cast_fp16 = layer_norm(axes = input_23_axes_0, beta = input_23_beta_0_to_fp16, epsilon = var_678_to_fp16, gamma = input_23_gamma_0_to_fp16, x = inputs_9_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<string, []> q_5_pad_type_0 = const()[name = tensor<string, []>("q_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_5_strides_0 = const()[name = tensor<string, []>("q_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_5_pad_0 = const()[name = tensor<string, []>("q_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_5_dilations_0 = const()[name = tensor<string, []>("q_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_5_groups_0 = const()[name = tensor<string, []>("q_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_713_weight_0_to_fp16 = const()[name = tensor<string, []>("op_713_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93002112)))];
+            tensor<fp16, [1280]> var_713_bias_0_to_fp16 = const()[name = tensor<string, []>("op_713_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96278976)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_713_cast_fp16 = conv(bias = var_713_bias_0_to_fp16, dilations = q_5_dilations_0, groups = q_5_groups_0, pad = q_5_pad_0, pad_type = q_5_pad_type_0, strides = q_5_strides_0, weight = var_713_weight_0_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_713_cast_fp16")];
+            tensor<string, []> k_5_pad_type_0 = const()[name = tensor<string, []>("k_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_5_strides_0 = const()[name = tensor<string, []>("k_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_5_pad_0 = const()[name = tensor<string, []>("k_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_5_dilations_0 = const()[name = tensor<string, []>("k_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_5_groups_0 = const()[name = tensor<string, []>("k_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_2_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96281600)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_5_cast_fp16 = conv(dilations = k_5_dilations_0, groups = k_5_groups_0, pad = k_5_pad_0, pad_type = k_5_pad_type_0, strides = k_5_strides_0, weight = blocks_2_attn_key_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("k_5_cast_fp16")];
+            tensor<string, []> var_711_pad_type_0 = const()[name = tensor<string, []>("op_711_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_711_strides_0 = const()[name = tensor<string, []>("op_711_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_711_pad_0 = const()[name = tensor<string, []>("op_711_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_711_dilations_0 = const()[name = tensor<string, []>("op_711_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_711_groups_0 = const()[name = tensor<string, []>("op_711_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_2_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99558464)))];
+            tensor<fp16, [1280]> blocks_2_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102835328)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_711_cast_fp16 = conv(bias = blocks_2_attn_value_bias_to_fp16, dilations = var_711_dilations_0, groups = var_711_groups_0, pad = var_711_pad_0, pad_type = var_711_pad_type_0, strides = var_711_strides_0, weight = blocks_2_attn_value_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_711_cast_fp16")];
+            tensor<int32, [20]> tile_6 = const()[name = tensor<string, []>("tile_6"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_714_axis_0 = const()[name = tensor<string, []>("op_714_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_19 = split(axis = var_714_axis_0, split_sizes = tile_6, x = var_713_cast_fp16)[name = tensor<string, []>("op_714_cast_fp16")];
+            tensor<int32, [4]> var_735_perm_0 = const()[name = tensor<string, []>("op_735_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_7 = const()[name = tensor<string, []>("tile_7"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_736_axis_0 = const()[name = tensor<string, []>("op_736_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_735_cast_fp16 = transpose(perm = var_735_perm_0, x = k_5_cast_fp16)[name = tensor<string, []>("transpose_30")];
+            tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_19 = split(axis = var_736_axis_0, split_sizes = tile_7, x = var_735_cast_fp16)[name = tensor<string, []>("op_736_cast_fp16")];
+            tensor<int32, [20]> tile_8 = const()[name = tensor<string, []>("tile_8"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_757_axis_0 = const()[name = tensor<string, []>("op_757_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_19 = split(axis = var_757_axis_0, split_sizes = tile_8, x = var_711_cast_fp16)[name = tensor<string, []>("op_757_cast_fp16")];
+            tensor<string, []> aw_81_equation_0 = const()[name = tensor<string, []>("aw_81_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_81_cast_fp16 = einsum(equation = aw_81_equation_0, values = (var_736_cast_fp16_0, var_714_cast_fp16_0))[name = tensor<string, []>("aw_81_cast_fp16")];
+            tensor<string, []> aw_83_equation_0 = const()[name = tensor<string, []>("aw_83_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_83_cast_fp16 = einsum(equation = aw_83_equation_0, values = (var_736_cast_fp16_1, var_714_cast_fp16_1))[name = tensor<string, []>("aw_83_cast_fp16")];
+            tensor<string, []> aw_85_equation_0 = const()[name = tensor<string, []>("aw_85_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_85_cast_fp16 = einsum(equation = aw_85_equation_0, values = (var_736_cast_fp16_2, var_714_cast_fp16_2))[name = tensor<string, []>("aw_85_cast_fp16")];
+            tensor<string, []> aw_87_equation_0 = const()[name = tensor<string, []>("aw_87_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_87_cast_fp16 = einsum(equation = aw_87_equation_0, values = (var_736_cast_fp16_3, var_714_cast_fp16_3))[name = tensor<string, []>("aw_87_cast_fp16")];
+            tensor<string, []> aw_89_equation_0 = const()[name = tensor<string, []>("aw_89_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_89_cast_fp16 = einsum(equation = aw_89_equation_0, values = (var_736_cast_fp16_4, var_714_cast_fp16_4))[name = tensor<string, []>("aw_89_cast_fp16")];
+            tensor<string, []> aw_91_equation_0 = const()[name = tensor<string, []>("aw_91_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_91_cast_fp16 = einsum(equation = aw_91_equation_0, values = (var_736_cast_fp16_5, var_714_cast_fp16_5))[name = tensor<string, []>("aw_91_cast_fp16")];
+            tensor<string, []> aw_93_equation_0 = const()[name = tensor<string, []>("aw_93_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_93_cast_fp16 = einsum(equation = aw_93_equation_0, values = (var_736_cast_fp16_6, var_714_cast_fp16_6))[name = tensor<string, []>("aw_93_cast_fp16")];
+            tensor<string, []> aw_95_equation_0 = const()[name = tensor<string, []>("aw_95_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_95_cast_fp16 = einsum(equation = aw_95_equation_0, values = (var_736_cast_fp16_7, var_714_cast_fp16_7))[name = tensor<string, []>("aw_95_cast_fp16")];
+            tensor<string, []> aw_97_equation_0 = const()[name = tensor<string, []>("aw_97_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_97_cast_fp16 = einsum(equation = aw_97_equation_0, values = (var_736_cast_fp16_8, var_714_cast_fp16_8))[name = tensor<string, []>("aw_97_cast_fp16")];
+            tensor<string, []> aw_99_equation_0 = const()[name = tensor<string, []>("aw_99_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_99_cast_fp16 = einsum(equation = aw_99_equation_0, values = (var_736_cast_fp16_9, var_714_cast_fp16_9))[name = tensor<string, []>("aw_99_cast_fp16")];
+            tensor<string, []> aw_101_equation_0 = const()[name = tensor<string, []>("aw_101_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_101_cast_fp16 = einsum(equation = aw_101_equation_0, values = (var_736_cast_fp16_10, var_714_cast_fp16_10))[name = tensor<string, []>("aw_101_cast_fp16")];
+            tensor<string, []> aw_103_equation_0 = const()[name = tensor<string, []>("aw_103_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_103_cast_fp16 = einsum(equation = aw_103_equation_0, values = (var_736_cast_fp16_11, var_714_cast_fp16_11))[name = tensor<string, []>("aw_103_cast_fp16")];
+            tensor<string, []> aw_105_equation_0 = const()[name = tensor<string, []>("aw_105_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_105_cast_fp16 = einsum(equation = aw_105_equation_0, values = (var_736_cast_fp16_12, var_714_cast_fp16_12))[name = tensor<string, []>("aw_105_cast_fp16")];
+            tensor<string, []> aw_107_equation_0 = const()[name = tensor<string, []>("aw_107_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_107_cast_fp16 = einsum(equation = aw_107_equation_0, values = (var_736_cast_fp16_13, var_714_cast_fp16_13))[name = tensor<string, []>("aw_107_cast_fp16")];
+            tensor<string, []> aw_109_equation_0 = const()[name = tensor<string, []>("aw_109_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_109_cast_fp16 = einsum(equation = aw_109_equation_0, values = (var_736_cast_fp16_14, var_714_cast_fp16_14))[name = tensor<string, []>("aw_109_cast_fp16")];
+            tensor<string, []> aw_111_equation_0 = const()[name = tensor<string, []>("aw_111_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_111_cast_fp16 = einsum(equation = aw_111_equation_0, values = (var_736_cast_fp16_15, var_714_cast_fp16_15))[name = tensor<string, []>("aw_111_cast_fp16")];
+            tensor<string, []> aw_113_equation_0 = const()[name = tensor<string, []>("aw_113_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_113_cast_fp16 = einsum(equation = aw_113_equation_0, values = (var_736_cast_fp16_16, var_714_cast_fp16_16))[name = tensor<string, []>("aw_113_cast_fp16")];
+            tensor<string, []> aw_115_equation_0 = const()[name = tensor<string, []>("aw_115_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_115_cast_fp16 = einsum(equation = aw_115_equation_0, values = (var_736_cast_fp16_17, var_714_cast_fp16_17))[name = tensor<string, []>("aw_115_cast_fp16")];
+            tensor<string, []> aw_117_equation_0 = const()[name = tensor<string, []>("aw_117_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_117_cast_fp16 = einsum(equation = aw_117_equation_0, values = (var_736_cast_fp16_18, var_714_cast_fp16_18))[name = tensor<string, []>("aw_117_cast_fp16")];
+            tensor<string, []> aw_119_equation_0 = const()[name = tensor<string, []>("aw_119_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_119_cast_fp16 = einsum(equation = aw_119_equation_0, values = (var_736_cast_fp16_19, var_714_cast_fp16_19))[name = tensor<string, []>("aw_119_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_818_cast_fp16 = softmax(axis = var_662, x = aw_81_cast_fp16)[name = tensor<string, []>("op_818_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_819_cast_fp16 = softmax(axis = var_662, x = aw_83_cast_fp16)[name = tensor<string, []>("op_819_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_820_cast_fp16 = softmax(axis = var_662, x = aw_85_cast_fp16)[name = tensor<string, []>("op_820_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_821_cast_fp16 = softmax(axis = var_662, x = aw_87_cast_fp16)[name = tensor<string, []>("op_821_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_822_cast_fp16 = softmax(axis = var_662, x = aw_89_cast_fp16)[name = tensor<string, []>("op_822_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_823_cast_fp16 = softmax(axis = var_662, x = aw_91_cast_fp16)[name = tensor<string, []>("op_823_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_824_cast_fp16 = softmax(axis = var_662, x = aw_93_cast_fp16)[name = tensor<string, []>("op_824_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_825_cast_fp16 = softmax(axis = var_662, x = aw_95_cast_fp16)[name = tensor<string, []>("op_825_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_826_cast_fp16 = softmax(axis = var_662, x = aw_97_cast_fp16)[name = tensor<string, []>("op_826_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_827_cast_fp16 = softmax(axis = var_662, x = aw_99_cast_fp16)[name = tensor<string, []>("op_827_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_828_cast_fp16 = softmax(axis = var_662, x = aw_101_cast_fp16)[name = tensor<string, []>("op_828_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_829_cast_fp16 = softmax(axis = var_662, x = aw_103_cast_fp16)[name = tensor<string, []>("op_829_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_830_cast_fp16 = softmax(axis = var_662, x = aw_105_cast_fp16)[name = tensor<string, []>("op_830_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_831_cast_fp16 = softmax(axis = var_662, x = aw_107_cast_fp16)[name = tensor<string, []>("op_831_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_832_cast_fp16 = softmax(axis = var_662, x = aw_109_cast_fp16)[name = tensor<string, []>("op_832_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_833_cast_fp16 = softmax(axis = var_662, x = aw_111_cast_fp16)[name = tensor<string, []>("op_833_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_834_cast_fp16 = softmax(axis = var_662, x = aw_113_cast_fp16)[name = tensor<string, []>("op_834_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_835_cast_fp16 = softmax(axis = var_662, x = aw_115_cast_fp16)[name = tensor<string, []>("op_835_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_836_cast_fp16 = softmax(axis = var_662, x = aw_117_cast_fp16)[name = tensor<string, []>("op_836_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_837_cast_fp16 = softmax(axis = var_662, x = aw_119_cast_fp16)[name = tensor<string, []>("op_837_cast_fp16")];
+            tensor<string, []> var_839_equation_0 = const()[name = tensor<string, []>("op_839_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_839_cast_fp16 = einsum(equation = var_839_equation_0, values = (var_757_cast_fp16_0, var_818_cast_fp16))[name = tensor<string, []>("op_839_cast_fp16")];
+            tensor<string, []> var_841_equation_0 = const()[name = tensor<string, []>("op_841_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_841_cast_fp16 = einsum(equation = var_841_equation_0, values = (var_757_cast_fp16_1, var_819_cast_fp16))[name = tensor<string, []>("op_841_cast_fp16")];
+            tensor<string, []> var_843_equation_0 = const()[name = tensor<string, []>("op_843_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_843_cast_fp16 = einsum(equation = var_843_equation_0, values = (var_757_cast_fp16_2, var_820_cast_fp16))[name = tensor<string, []>("op_843_cast_fp16")];
+            tensor<string, []> var_845_equation_0 = const()[name = tensor<string, []>("op_845_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_845_cast_fp16 = einsum(equation = var_845_equation_0, values = (var_757_cast_fp16_3, var_821_cast_fp16))[name = tensor<string, []>("op_845_cast_fp16")];
+            tensor<string, []> var_847_equation_0 = const()[name = tensor<string, []>("op_847_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_847_cast_fp16 = einsum(equation = var_847_equation_0, values = (var_757_cast_fp16_4, var_822_cast_fp16))[name = tensor<string, []>("op_847_cast_fp16")];
+            tensor<string, []> var_849_equation_0 = const()[name = tensor<string, []>("op_849_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_849_cast_fp16 = einsum(equation = var_849_equation_0, values = (var_757_cast_fp16_5, var_823_cast_fp16))[name = tensor<string, []>("op_849_cast_fp16")];
+            tensor<string, []> var_851_equation_0 = const()[name = tensor<string, []>("op_851_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_851_cast_fp16 = einsum(equation = var_851_equation_0, values = (var_757_cast_fp16_6, var_824_cast_fp16))[name = tensor<string, []>("op_851_cast_fp16")];
+            tensor<string, []> var_853_equation_0 = const()[name = tensor<string, []>("op_853_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_853_cast_fp16 = einsum(equation = var_853_equation_0, values = (var_757_cast_fp16_7, var_825_cast_fp16))[name = tensor<string, []>("op_853_cast_fp16")];
+            tensor<string, []> var_855_equation_0 = const()[name = tensor<string, []>("op_855_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_855_cast_fp16 = einsum(equation = var_855_equation_0, values = (var_757_cast_fp16_8, var_826_cast_fp16))[name = tensor<string, []>("op_855_cast_fp16")];
+            tensor<string, []> var_857_equation_0 = const()[name = tensor<string, []>("op_857_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_857_cast_fp16 = einsum(equation = var_857_equation_0, values = (var_757_cast_fp16_9, var_827_cast_fp16))[name = tensor<string, []>("op_857_cast_fp16")];
+            tensor<string, []> var_859_equation_0 = const()[name = tensor<string, []>("op_859_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_859_cast_fp16 = einsum(equation = var_859_equation_0, values = (var_757_cast_fp16_10, var_828_cast_fp16))[name = tensor<string, []>("op_859_cast_fp16")];
+            tensor<string, []> var_861_equation_0 = const()[name = tensor<string, []>("op_861_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_861_cast_fp16 = einsum(equation = var_861_equation_0, values = (var_757_cast_fp16_11, var_829_cast_fp16))[name = tensor<string, []>("op_861_cast_fp16")];
+            tensor<string, []> var_863_equation_0 = const()[name = tensor<string, []>("op_863_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_863_cast_fp16 = einsum(equation = var_863_equation_0, values = (var_757_cast_fp16_12, var_830_cast_fp16))[name = tensor<string, []>("op_863_cast_fp16")];
+            tensor<string, []> var_865_equation_0 = const()[name = tensor<string, []>("op_865_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_865_cast_fp16 = einsum(equation = var_865_equation_0, values = (var_757_cast_fp16_13, var_831_cast_fp16))[name = tensor<string, []>("op_865_cast_fp16")];
+            tensor<string, []> var_867_equation_0 = const()[name = tensor<string, []>("op_867_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_867_cast_fp16 = einsum(equation = var_867_equation_0, values = (var_757_cast_fp16_14, var_832_cast_fp16))[name = tensor<string, []>("op_867_cast_fp16")];
+            tensor<string, []> var_869_equation_0 = const()[name = tensor<string, []>("op_869_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_869_cast_fp16 = einsum(equation = var_869_equation_0, values = (var_757_cast_fp16_15, var_833_cast_fp16))[name = tensor<string, []>("op_869_cast_fp16")];
+            tensor<string, []> var_871_equation_0 = const()[name = tensor<string, []>("op_871_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16 = einsum(equation = var_871_equation_0, values = (var_757_cast_fp16_16, var_834_cast_fp16))[name = tensor<string, []>("op_871_cast_fp16")];
+            tensor<string, []> var_873_equation_0 = const()[name = tensor<string, []>("op_873_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_873_cast_fp16 = einsum(equation = var_873_equation_0, values = (var_757_cast_fp16_17, var_835_cast_fp16))[name = tensor<string, []>("op_873_cast_fp16")];
+            tensor<string, []> var_875_equation_0 = const()[name = tensor<string, []>("op_875_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_875_cast_fp16 = einsum(equation = var_875_equation_0, values = (var_757_cast_fp16_18, var_836_cast_fp16))[name = tensor<string, []>("op_875_cast_fp16")];
+            tensor<string, []> var_877_equation_0 = const()[name = tensor<string, []>("op_877_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_877_cast_fp16 = einsum(equation = var_877_equation_0, values = (var_757_cast_fp16_19, var_837_cast_fp16))[name = tensor<string, []>("op_877_cast_fp16")];
+            tensor<bool, []> input_25_interleave_0 = const()[name = tensor<string, []>("input_25_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_25_cast_fp16 = concat(axis = var_662, interleave = input_25_interleave_0, values = (var_839_cast_fp16, var_841_cast_fp16, var_843_cast_fp16, var_845_cast_fp16, var_847_cast_fp16, var_849_cast_fp16, var_851_cast_fp16, var_853_cast_fp16, var_855_cast_fp16, var_857_cast_fp16, var_859_cast_fp16, var_861_cast_fp16, var_863_cast_fp16, var_865_cast_fp16, var_867_cast_fp16, var_869_cast_fp16, var_871_cast_fp16, var_873_cast_fp16, var_875_cast_fp16, var_877_cast_fp16))[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<string, []> var_886_pad_type_0 = const()[name = tensor<string, []>("op_886_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_886_strides_0 = const()[name = tensor<string, []>("op_886_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_886_pad_0 = const()[name = tensor<string, []>("op_886_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_886_dilations_0 = const()[name = tensor<string, []>("op_886_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_886_groups_0 = const()[name = tensor<string, []>("op_886_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_2_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102837952)))];
+            tensor<fp16, [1280]> blocks_2_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106114816)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_886_cast_fp16 = conv(bias = blocks_2_attn_out_bias_to_fp16, dilations = var_886_dilations_0, groups = var_886_groups_0, pad = var_886_pad_0, pad_type = var_886_pad_type_0, strides = var_886_strides_0, weight = blocks_2_attn_out_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("op_886_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = var_886_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> input_27_axes_0 = const()[name = tensor<string, []>("input_27_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106117440)))];
+            tensor<fp16, [1280]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106120064)))];
+            tensor<fp16, []> var_896_to_fp16 = const()[name = tensor<string, []>("op_896_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_27_cast_fp16 = layer_norm(axes = input_27_axes_0, beta = input_27_beta_0_to_fp16, epsilon = var_896_to_fp16, gamma = input_27_gamma_0_to_fp16, x = inputs_11_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_29_strides_0 = const()[name = tensor<string, []>("input_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_29_dilations_0 = const()[name = tensor<string, []>("input_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_29_groups_0 = const()[name = tensor<string, []>("input_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_2_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106122688)))];
+            tensor<fp16, [5120]> blocks_2_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119229952)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_29_cast_fp16 = conv(bias = blocks_2_mlp_0_bias_to_fp16, dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = blocks_2_mlp_0_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<string, []> var_922_pad_type_0 = const()[name = tensor<string, []>("op_922_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_922_strides_0 = const()[name = tensor<string, []>("op_922_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_922_pad_0 = const()[name = tensor<string, []>("op_922_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_922_dilations_0 = const()[name = tensor<string, []>("op_922_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_922_groups_0 = const()[name = tensor<string, []>("op_922_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_2_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119240256)))];
+            tensor<fp16, [1280]> blocks_2_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132347520)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_922_cast_fp16 = conv(bias = blocks_2_mlp_2_bias_to_fp16, dilations = var_922_dilations_0, groups = var_922_groups_0, pad = var_922_pad_0, pad_type = var_922_pad_type_0, strides = var_922_strides_0, weight = blocks_2_mlp_2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("op_922_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = var_922_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_931 = const()[name = tensor<string, []>("op_931"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_33_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_33_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132350144)))];
+            tensor<fp16, [1280]> input_33_beta_0_to_fp16 = const()[name = tensor<string, []>("input_33_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132352768)))];
+            tensor<fp16, []> var_947_to_fp16 = const()[name = tensor<string, []>("op_947_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, beta = input_33_beta_0_to_fp16, epsilon = var_947_to_fp16, gamma = input_33_gamma_0_to_fp16, x = inputs_13_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<string, []> q_7_pad_type_0 = const()[name = tensor<string, []>("q_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_7_strides_0 = const()[name = tensor<string, []>("q_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_7_pad_0 = const()[name = tensor<string, []>("q_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_7_dilations_0 = const()[name = tensor<string, []>("q_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_7_groups_0 = const()[name = tensor<string, []>("q_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_982_weight_0_to_fp16 = const()[name = tensor<string, []>("op_982_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132355392)))];
+            tensor<fp16, [1280]> var_982_bias_0_to_fp16 = const()[name = tensor<string, []>("op_982_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135632256)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_982_cast_fp16 = conv(bias = var_982_bias_0_to_fp16, dilations = q_7_dilations_0, groups = q_7_groups_0, pad = q_7_pad_0, pad_type = q_7_pad_type_0, strides = q_7_strides_0, weight = var_982_weight_0_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_982_cast_fp16")];
+            tensor<string, []> k_7_pad_type_0 = const()[name = tensor<string, []>("k_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_7_strides_0 = const()[name = tensor<string, []>("k_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_7_pad_0 = const()[name = tensor<string, []>("k_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_7_dilations_0 = const()[name = tensor<string, []>("k_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_7_groups_0 = const()[name = tensor<string, []>("k_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_3_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135634880)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_7_cast_fp16 = conv(dilations = k_7_dilations_0, groups = k_7_groups_0, pad = k_7_pad_0, pad_type = k_7_pad_type_0, strides = k_7_strides_0, weight = blocks_3_attn_key_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("k_7_cast_fp16")];
+            tensor<string, []> var_980_pad_type_0 = const()[name = tensor<string, []>("op_980_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_980_strides_0 = const()[name = tensor<string, []>("op_980_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_980_pad_0 = const()[name = tensor<string, []>("op_980_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_980_dilations_0 = const()[name = tensor<string, []>("op_980_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_980_groups_0 = const()[name = tensor<string, []>("op_980_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_3_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138911744)))];
+            tensor<fp16, [1280]> blocks_3_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142188608)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_980_cast_fp16 = conv(bias = blocks_3_attn_value_bias_to_fp16, dilations = var_980_dilations_0, groups = var_980_groups_0, pad = var_980_pad_0, pad_type = var_980_pad_type_0, strides = var_980_strides_0, weight = blocks_3_attn_value_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_980_cast_fp16")];
+            tensor<int32, [20]> tile_9 = const()[name = tensor<string, []>("tile_9"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_983_axis_0 = const()[name = tensor<string, []>("op_983_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_19 = split(axis = var_983_axis_0, split_sizes = tile_9, x = var_982_cast_fp16)[name = tensor<string, []>("op_983_cast_fp16")];
+            tensor<int32, [4]> var_1004_perm_0 = const()[name = tensor<string, []>("op_1004_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_10 = const()[name = tensor<string, []>("tile_10"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1005_axis_0 = const()[name = tensor<string, []>("op_1005_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_1004_cast_fp16 = transpose(perm = var_1004_perm_0, x = k_7_cast_fp16)[name = tensor<string, []>("transpose_29")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_19 = split(axis = var_1005_axis_0, split_sizes = tile_10, x = var_1004_cast_fp16)[name = tensor<string, []>("op_1005_cast_fp16")];
+            tensor<int32, [20]> tile_11 = const()[name = tensor<string, []>("tile_11"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1026_axis_0 = const()[name = tensor<string, []>("op_1026_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_19 = split(axis = var_1026_axis_0, split_sizes = tile_11, x = var_980_cast_fp16)[name = tensor<string, []>("op_1026_cast_fp16")];
+            tensor<string, []> aw_121_equation_0 = const()[name = tensor<string, []>("aw_121_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_121_cast_fp16 = einsum(equation = aw_121_equation_0, values = (var_1005_cast_fp16_0, var_983_cast_fp16_0))[name = tensor<string, []>("aw_121_cast_fp16")];
+            tensor<string, []> aw_123_equation_0 = const()[name = tensor<string, []>("aw_123_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_123_cast_fp16 = einsum(equation = aw_123_equation_0, values = (var_1005_cast_fp16_1, var_983_cast_fp16_1))[name = tensor<string, []>("aw_123_cast_fp16")];
+            tensor<string, []> aw_125_equation_0 = const()[name = tensor<string, []>("aw_125_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_125_cast_fp16 = einsum(equation = aw_125_equation_0, values = (var_1005_cast_fp16_2, var_983_cast_fp16_2))[name = tensor<string, []>("aw_125_cast_fp16")];
+            tensor<string, []> aw_127_equation_0 = const()[name = tensor<string, []>("aw_127_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_127_cast_fp16 = einsum(equation = aw_127_equation_0, values = (var_1005_cast_fp16_3, var_983_cast_fp16_3))[name = tensor<string, []>("aw_127_cast_fp16")];
+            tensor<string, []> aw_129_equation_0 = const()[name = tensor<string, []>("aw_129_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_129_cast_fp16 = einsum(equation = aw_129_equation_0, values = (var_1005_cast_fp16_4, var_983_cast_fp16_4))[name = tensor<string, []>("aw_129_cast_fp16")];
+            tensor<string, []> aw_131_equation_0 = const()[name = tensor<string, []>("aw_131_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_131_cast_fp16 = einsum(equation = aw_131_equation_0, values = (var_1005_cast_fp16_5, var_983_cast_fp16_5))[name = tensor<string, []>("aw_131_cast_fp16")];
+            tensor<string, []> aw_133_equation_0 = const()[name = tensor<string, []>("aw_133_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_133_cast_fp16 = einsum(equation = aw_133_equation_0, values = (var_1005_cast_fp16_6, var_983_cast_fp16_6))[name = tensor<string, []>("aw_133_cast_fp16")];
+            tensor<string, []> aw_135_equation_0 = const()[name = tensor<string, []>("aw_135_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_135_cast_fp16 = einsum(equation = aw_135_equation_0, values = (var_1005_cast_fp16_7, var_983_cast_fp16_7))[name = tensor<string, []>("aw_135_cast_fp16")];
+            tensor<string, []> aw_137_equation_0 = const()[name = tensor<string, []>("aw_137_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_137_cast_fp16 = einsum(equation = aw_137_equation_0, values = (var_1005_cast_fp16_8, var_983_cast_fp16_8))[name = tensor<string, []>("aw_137_cast_fp16")];
+            tensor<string, []> aw_139_equation_0 = const()[name = tensor<string, []>("aw_139_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_139_cast_fp16 = einsum(equation = aw_139_equation_0, values = (var_1005_cast_fp16_9, var_983_cast_fp16_9))[name = tensor<string, []>("aw_139_cast_fp16")];
+            tensor<string, []> aw_141_equation_0 = const()[name = tensor<string, []>("aw_141_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_141_cast_fp16 = einsum(equation = aw_141_equation_0, values = (var_1005_cast_fp16_10, var_983_cast_fp16_10))[name = tensor<string, []>("aw_141_cast_fp16")];
+            tensor<string, []> aw_143_equation_0 = const()[name = tensor<string, []>("aw_143_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_143_cast_fp16 = einsum(equation = aw_143_equation_0, values = (var_1005_cast_fp16_11, var_983_cast_fp16_11))[name = tensor<string, []>("aw_143_cast_fp16")];
+            tensor<string, []> aw_145_equation_0 = const()[name = tensor<string, []>("aw_145_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_145_cast_fp16 = einsum(equation = aw_145_equation_0, values = (var_1005_cast_fp16_12, var_983_cast_fp16_12))[name = tensor<string, []>("aw_145_cast_fp16")];
+            tensor<string, []> aw_147_equation_0 = const()[name = tensor<string, []>("aw_147_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_147_cast_fp16 = einsum(equation = aw_147_equation_0, values = (var_1005_cast_fp16_13, var_983_cast_fp16_13))[name = tensor<string, []>("aw_147_cast_fp16")];
+            tensor<string, []> aw_149_equation_0 = const()[name = tensor<string, []>("aw_149_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_149_cast_fp16 = einsum(equation = aw_149_equation_0, values = (var_1005_cast_fp16_14, var_983_cast_fp16_14))[name = tensor<string, []>("aw_149_cast_fp16")];
+            tensor<string, []> aw_151_equation_0 = const()[name = tensor<string, []>("aw_151_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_151_cast_fp16 = einsum(equation = aw_151_equation_0, values = (var_1005_cast_fp16_15, var_983_cast_fp16_15))[name = tensor<string, []>("aw_151_cast_fp16")];
+            tensor<string, []> aw_153_equation_0 = const()[name = tensor<string, []>("aw_153_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_153_cast_fp16 = einsum(equation = aw_153_equation_0, values = (var_1005_cast_fp16_16, var_983_cast_fp16_16))[name = tensor<string, []>("aw_153_cast_fp16")];
+            tensor<string, []> aw_155_equation_0 = const()[name = tensor<string, []>("aw_155_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_155_cast_fp16 = einsum(equation = aw_155_equation_0, values = (var_1005_cast_fp16_17, var_983_cast_fp16_17))[name = tensor<string, []>("aw_155_cast_fp16")];
+            tensor<string, []> aw_157_equation_0 = const()[name = tensor<string, []>("aw_157_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_157_cast_fp16 = einsum(equation = aw_157_equation_0, values = (var_1005_cast_fp16_18, var_983_cast_fp16_18))[name = tensor<string, []>("aw_157_cast_fp16")];
+            tensor<string, []> aw_159_equation_0 = const()[name = tensor<string, []>("aw_159_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_159_cast_fp16 = einsum(equation = aw_159_equation_0, values = (var_1005_cast_fp16_19, var_983_cast_fp16_19))[name = tensor<string, []>("aw_159_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1087_cast_fp16 = softmax(axis = var_931, x = aw_121_cast_fp16)[name = tensor<string, []>("op_1087_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1088_cast_fp16 = softmax(axis = var_931, x = aw_123_cast_fp16)[name = tensor<string, []>("op_1088_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1089_cast_fp16 = softmax(axis = var_931, x = aw_125_cast_fp16)[name = tensor<string, []>("op_1089_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1090_cast_fp16 = softmax(axis = var_931, x = aw_127_cast_fp16)[name = tensor<string, []>("op_1090_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1091_cast_fp16 = softmax(axis = var_931, x = aw_129_cast_fp16)[name = tensor<string, []>("op_1091_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1092_cast_fp16 = softmax(axis = var_931, x = aw_131_cast_fp16)[name = tensor<string, []>("op_1092_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1093_cast_fp16 = softmax(axis = var_931, x = aw_133_cast_fp16)[name = tensor<string, []>("op_1093_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1094_cast_fp16 = softmax(axis = var_931, x = aw_135_cast_fp16)[name = tensor<string, []>("op_1094_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1095_cast_fp16 = softmax(axis = var_931, x = aw_137_cast_fp16)[name = tensor<string, []>("op_1095_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1096_cast_fp16 = softmax(axis = var_931, x = aw_139_cast_fp16)[name = tensor<string, []>("op_1096_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1097_cast_fp16 = softmax(axis = var_931, x = aw_141_cast_fp16)[name = tensor<string, []>("op_1097_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1098_cast_fp16 = softmax(axis = var_931, x = aw_143_cast_fp16)[name = tensor<string, []>("op_1098_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1099_cast_fp16 = softmax(axis = var_931, x = aw_145_cast_fp16)[name = tensor<string, []>("op_1099_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1100_cast_fp16 = softmax(axis = var_931, x = aw_147_cast_fp16)[name = tensor<string, []>("op_1100_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1101_cast_fp16 = softmax(axis = var_931, x = aw_149_cast_fp16)[name = tensor<string, []>("op_1101_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1102_cast_fp16 = softmax(axis = var_931, x = aw_151_cast_fp16)[name = tensor<string, []>("op_1102_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1103_cast_fp16 = softmax(axis = var_931, x = aw_153_cast_fp16)[name = tensor<string, []>("op_1103_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1104_cast_fp16 = softmax(axis = var_931, x = aw_155_cast_fp16)[name = tensor<string, []>("op_1104_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1105_cast_fp16 = softmax(axis = var_931, x = aw_157_cast_fp16)[name = tensor<string, []>("op_1105_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1106_cast_fp16 = softmax(axis = var_931, x = aw_159_cast_fp16)[name = tensor<string, []>("op_1106_cast_fp16")];
+            tensor<string, []> var_1108_equation_0 = const()[name = tensor<string, []>("op_1108_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16 = einsum(equation = var_1108_equation_0, values = (var_1026_cast_fp16_0, var_1087_cast_fp16))[name = tensor<string, []>("op_1108_cast_fp16")];
+            tensor<string, []> var_1110_equation_0 = const()[name = tensor<string, []>("op_1110_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1110_cast_fp16 = einsum(equation = var_1110_equation_0, values = (var_1026_cast_fp16_1, var_1088_cast_fp16))[name = tensor<string, []>("op_1110_cast_fp16")];
+            tensor<string, []> var_1112_equation_0 = const()[name = tensor<string, []>("op_1112_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1112_cast_fp16 = einsum(equation = var_1112_equation_0, values = (var_1026_cast_fp16_2, var_1089_cast_fp16))[name = tensor<string, []>("op_1112_cast_fp16")];
+            tensor<string, []> var_1114_equation_0 = const()[name = tensor<string, []>("op_1114_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1114_cast_fp16 = einsum(equation = var_1114_equation_0, values = (var_1026_cast_fp16_3, var_1090_cast_fp16))[name = tensor<string, []>("op_1114_cast_fp16")];
+            tensor<string, []> var_1116_equation_0 = const()[name = tensor<string, []>("op_1116_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1116_cast_fp16 = einsum(equation = var_1116_equation_0, values = (var_1026_cast_fp16_4, var_1091_cast_fp16))[name = tensor<string, []>("op_1116_cast_fp16")];
+            tensor<string, []> var_1118_equation_0 = const()[name = tensor<string, []>("op_1118_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1118_cast_fp16 = einsum(equation = var_1118_equation_0, values = (var_1026_cast_fp16_5, var_1092_cast_fp16))[name = tensor<string, []>("op_1118_cast_fp16")];
+            tensor<string, []> var_1120_equation_0 = const()[name = tensor<string, []>("op_1120_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1120_cast_fp16 = einsum(equation = var_1120_equation_0, values = (var_1026_cast_fp16_6, var_1093_cast_fp16))[name = tensor<string, []>("op_1120_cast_fp16")];
+            tensor<string, []> var_1122_equation_0 = const()[name = tensor<string, []>("op_1122_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1122_cast_fp16 = einsum(equation = var_1122_equation_0, values = (var_1026_cast_fp16_7, var_1094_cast_fp16))[name = tensor<string, []>("op_1122_cast_fp16")];
+            tensor<string, []> var_1124_equation_0 = const()[name = tensor<string, []>("op_1124_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1124_cast_fp16 = einsum(equation = var_1124_equation_0, values = (var_1026_cast_fp16_8, var_1095_cast_fp16))[name = tensor<string, []>("op_1124_cast_fp16")];
+            tensor<string, []> var_1126_equation_0 = const()[name = tensor<string, []>("op_1126_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1126_cast_fp16 = einsum(equation = var_1126_equation_0, values = (var_1026_cast_fp16_9, var_1096_cast_fp16))[name = tensor<string, []>("op_1126_cast_fp16")];
+            tensor<string, []> var_1128_equation_0 = const()[name = tensor<string, []>("op_1128_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1128_cast_fp16 = einsum(equation = var_1128_equation_0, values = (var_1026_cast_fp16_10, var_1097_cast_fp16))[name = tensor<string, []>("op_1128_cast_fp16")];
+            tensor<string, []> var_1130_equation_0 = const()[name = tensor<string, []>("op_1130_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1130_cast_fp16 = einsum(equation = var_1130_equation_0, values = (var_1026_cast_fp16_11, var_1098_cast_fp16))[name = tensor<string, []>("op_1130_cast_fp16")];
+            tensor<string, []> var_1132_equation_0 = const()[name = tensor<string, []>("op_1132_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1132_cast_fp16 = einsum(equation = var_1132_equation_0, values = (var_1026_cast_fp16_12, var_1099_cast_fp16))[name = tensor<string, []>("op_1132_cast_fp16")];
+            tensor<string, []> var_1134_equation_0 = const()[name = tensor<string, []>("op_1134_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1134_cast_fp16 = einsum(equation = var_1134_equation_0, values = (var_1026_cast_fp16_13, var_1100_cast_fp16))[name = tensor<string, []>("op_1134_cast_fp16")];
+            tensor<string, []> var_1136_equation_0 = const()[name = tensor<string, []>("op_1136_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1136_cast_fp16 = einsum(equation = var_1136_equation_0, values = (var_1026_cast_fp16_14, var_1101_cast_fp16))[name = tensor<string, []>("op_1136_cast_fp16")];
+            tensor<string, []> var_1138_equation_0 = const()[name = tensor<string, []>("op_1138_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1138_cast_fp16 = einsum(equation = var_1138_equation_0, values = (var_1026_cast_fp16_15, var_1102_cast_fp16))[name = tensor<string, []>("op_1138_cast_fp16")];
+            tensor<string, []> var_1140_equation_0 = const()[name = tensor<string, []>("op_1140_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1140_cast_fp16 = einsum(equation = var_1140_equation_0, values = (var_1026_cast_fp16_16, var_1103_cast_fp16))[name = tensor<string, []>("op_1140_cast_fp16")];
+            tensor<string, []> var_1142_equation_0 = const()[name = tensor<string, []>("op_1142_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1142_cast_fp16 = einsum(equation = var_1142_equation_0, values = (var_1026_cast_fp16_17, var_1104_cast_fp16))[name = tensor<string, []>("op_1142_cast_fp16")];
+            tensor<string, []> var_1144_equation_0 = const()[name = tensor<string, []>("op_1144_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1144_cast_fp16 = einsum(equation = var_1144_equation_0, values = (var_1026_cast_fp16_18, var_1105_cast_fp16))[name = tensor<string, []>("op_1144_cast_fp16")];
+            tensor<string, []> var_1146_equation_0 = const()[name = tensor<string, []>("op_1146_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1146_cast_fp16 = einsum(equation = var_1146_equation_0, values = (var_1026_cast_fp16_19, var_1106_cast_fp16))[name = tensor<string, []>("op_1146_cast_fp16")];
+            tensor<bool, []> input_35_interleave_0 = const()[name = tensor<string, []>("input_35_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_35_cast_fp16 = concat(axis = var_931, interleave = input_35_interleave_0, values = (var_1108_cast_fp16, var_1110_cast_fp16, var_1112_cast_fp16, var_1114_cast_fp16, var_1116_cast_fp16, var_1118_cast_fp16, var_1120_cast_fp16, var_1122_cast_fp16, var_1124_cast_fp16, var_1126_cast_fp16, var_1128_cast_fp16, var_1130_cast_fp16, var_1132_cast_fp16, var_1134_cast_fp16, var_1136_cast_fp16, var_1138_cast_fp16, var_1140_cast_fp16, var_1142_cast_fp16, var_1144_cast_fp16, var_1146_cast_fp16))[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<string, []> var_1155_pad_type_0 = const()[name = tensor<string, []>("op_1155_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1155_strides_0 = const()[name = tensor<string, []>("op_1155_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1155_pad_0 = const()[name = tensor<string, []>("op_1155_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1155_dilations_0 = const()[name = tensor<string, []>("op_1155_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1155_groups_0 = const()[name = tensor<string, []>("op_1155_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_3_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142191232)))];
+            tensor<fp16, [1280]> blocks_3_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145468096)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1155_cast_fp16 = conv(bias = blocks_3_attn_out_bias_to_fp16, dilations = var_1155_dilations_0, groups = var_1155_groups_0, pad = var_1155_pad_0, pad_type = var_1155_pad_type_0, strides = var_1155_strides_0, weight = blocks_3_attn_out_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("op_1155_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = var_1155_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> input_37_axes_0 = const()[name = tensor<string, []>("input_37_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_37_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145470720)))];
+            tensor<fp16, [1280]> input_37_beta_0_to_fp16 = const()[name = tensor<string, []>("input_37_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145473344)))];
+            tensor<fp16, []> var_1165_to_fp16 = const()[name = tensor<string, []>("op_1165_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = input_37_beta_0_to_fp16, epsilon = var_1165_to_fp16, gamma = input_37_gamma_0_to_fp16, x = inputs_15_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_pad_type_0 = const()[name = tensor<string, []>("input_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_39_strides_0 = const()[name = tensor<string, []>("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_39_pad_0 = const()[name = tensor<string, []>("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_39_dilations_0 = const()[name = tensor<string, []>("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_39_groups_0 = const()[name = tensor<string, []>("input_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_3_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145475968)))];
+            tensor<fp16, [5120]> blocks_3_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(158583232)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_39_cast_fp16 = conv(bias = blocks_3_mlp_0_bias_to_fp16, dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = blocks_3_mlp_0_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<string, []> input_41_mode_0 = const()[name = tensor<string, []>("input_41_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = input_39_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<string, []> var_1191_pad_type_0 = const()[name = tensor<string, []>("op_1191_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1191_strides_0 = const()[name = tensor<string, []>("op_1191_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1191_pad_0 = const()[name = tensor<string, []>("op_1191_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1191_dilations_0 = const()[name = tensor<string, []>("op_1191_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1191_groups_0 = const()[name = tensor<string, []>("op_1191_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_3_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(158593536)))];
+            tensor<fp16, [1280]> blocks_3_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171700800)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1191_cast_fp16 = conv(bias = blocks_3_mlp_2_bias_to_fp16, dilations = var_1191_dilations_0, groups = var_1191_groups_0, pad = var_1191_pad_0, pad_type = var_1191_pad_type_0, strides = var_1191_strides_0, weight = blocks_3_mlp_2_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("op_1191_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = var_1191_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, []> var_1200 = const()[name = tensor<string, []>("op_1200"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_43_axes_0 = const()[name = tensor<string, []>("input_43_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_43_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171703424)))];
+            tensor<fp16, [1280]> input_43_beta_0_to_fp16 = const()[name = tensor<string, []>("input_43_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171706048)))];
+            tensor<fp16, []> var_1216_to_fp16 = const()[name = tensor<string, []>("op_1216_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_43_cast_fp16 = layer_norm(axes = input_43_axes_0, beta = input_43_beta_0_to_fp16, epsilon = var_1216_to_fp16, gamma = input_43_gamma_0_to_fp16, x = inputs_17_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<string, []> q_9_pad_type_0 = const()[name = tensor<string, []>("q_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_9_strides_0 = const()[name = tensor<string, []>("q_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_9_pad_0 = const()[name = tensor<string, []>("q_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_9_dilations_0 = const()[name = tensor<string, []>("q_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_9_groups_0 = const()[name = tensor<string, []>("q_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_1251_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1251_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171708672)))];
+            tensor<fp16, [1280]> var_1251_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1251_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174985536)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1251_cast_fp16 = conv(bias = var_1251_bias_0_to_fp16, dilations = q_9_dilations_0, groups = q_9_groups_0, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = q_9_strides_0, weight = var_1251_weight_0_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_1251_cast_fp16")];
+            tensor<string, []> k_9_pad_type_0 = const()[name = tensor<string, []>("k_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_9_strides_0 = const()[name = tensor<string, []>("k_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_9_pad_0 = const()[name = tensor<string, []>("k_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_9_dilations_0 = const()[name = tensor<string, []>("k_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_9_groups_0 = const()[name = tensor<string, []>("k_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_4_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174988160)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_9_cast_fp16 = conv(dilations = k_9_dilations_0, groups = k_9_groups_0, pad = k_9_pad_0, pad_type = k_9_pad_type_0, strides = k_9_strides_0, weight = blocks_4_attn_key_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("k_9_cast_fp16")];
+            tensor<string, []> var_1249_pad_type_0 = const()[name = tensor<string, []>("op_1249_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1249_strides_0 = const()[name = tensor<string, []>("op_1249_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1249_pad_0 = const()[name = tensor<string, []>("op_1249_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1249_dilations_0 = const()[name = tensor<string, []>("op_1249_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1249_groups_0 = const()[name = tensor<string, []>("op_1249_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_4_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(178265024)))];
+            tensor<fp16, [1280]> blocks_4_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(181541888)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1249_cast_fp16 = conv(bias = blocks_4_attn_value_bias_to_fp16, dilations = var_1249_dilations_0, groups = var_1249_groups_0, pad = var_1249_pad_0, pad_type = var_1249_pad_type_0, strides = var_1249_strides_0, weight = blocks_4_attn_value_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_1249_cast_fp16")];
+            tensor<int32, [20]> tile_12 = const()[name = tensor<string, []>("tile_12"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1252_axis_0 = const()[name = tensor<string, []>("op_1252_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_19 = split(axis = var_1252_axis_0, split_sizes = tile_12, x = var_1251_cast_fp16)[name = tensor<string, []>("op_1252_cast_fp16")];
+            tensor<int32, [4]> var_1273_perm_0 = const()[name = tensor<string, []>("op_1273_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_13 = const()[name = tensor<string, []>("tile_13"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1274_axis_0 = const()[name = tensor<string, []>("op_1274_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_1273_cast_fp16 = transpose(perm = var_1273_perm_0, x = k_9_cast_fp16)[name = tensor<string, []>("transpose_28")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_19 = split(axis = var_1274_axis_0, split_sizes = tile_13, x = var_1273_cast_fp16)[name = tensor<string, []>("op_1274_cast_fp16")];
+            tensor<int32, [20]> tile_14 = const()[name = tensor<string, []>("tile_14"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1295_axis_0 = const()[name = tensor<string, []>("op_1295_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_19 = split(axis = var_1295_axis_0, split_sizes = tile_14, x = var_1249_cast_fp16)[name = tensor<string, []>("op_1295_cast_fp16")];
+            tensor<string, []> aw_161_equation_0 = const()[name = tensor<string, []>("aw_161_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_161_cast_fp16 = einsum(equation = aw_161_equation_0, values = (var_1274_cast_fp16_0, var_1252_cast_fp16_0))[name = tensor<string, []>("aw_161_cast_fp16")];
+            tensor<string, []> aw_163_equation_0 = const()[name = tensor<string, []>("aw_163_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_163_cast_fp16 = einsum(equation = aw_163_equation_0, values = (var_1274_cast_fp16_1, var_1252_cast_fp16_1))[name = tensor<string, []>("aw_163_cast_fp16")];
+            tensor<string, []> aw_165_equation_0 = const()[name = tensor<string, []>("aw_165_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_165_cast_fp16 = einsum(equation = aw_165_equation_0, values = (var_1274_cast_fp16_2, var_1252_cast_fp16_2))[name = tensor<string, []>("aw_165_cast_fp16")];
+            tensor<string, []> aw_167_equation_0 = const()[name = tensor<string, []>("aw_167_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_167_cast_fp16 = einsum(equation = aw_167_equation_0, values = (var_1274_cast_fp16_3, var_1252_cast_fp16_3))[name = tensor<string, []>("aw_167_cast_fp16")];
+            tensor<string, []> aw_169_equation_0 = const()[name = tensor<string, []>("aw_169_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_169_cast_fp16 = einsum(equation = aw_169_equation_0, values = (var_1274_cast_fp16_4, var_1252_cast_fp16_4))[name = tensor<string, []>("aw_169_cast_fp16")];
+            tensor<string, []> aw_171_equation_0 = const()[name = tensor<string, []>("aw_171_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_171_cast_fp16 = einsum(equation = aw_171_equation_0, values = (var_1274_cast_fp16_5, var_1252_cast_fp16_5))[name = tensor<string, []>("aw_171_cast_fp16")];
+            tensor<string, []> aw_173_equation_0 = const()[name = tensor<string, []>("aw_173_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_173_cast_fp16 = einsum(equation = aw_173_equation_0, values = (var_1274_cast_fp16_6, var_1252_cast_fp16_6))[name = tensor<string, []>("aw_173_cast_fp16")];
+            tensor<string, []> aw_175_equation_0 = const()[name = tensor<string, []>("aw_175_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_175_cast_fp16 = einsum(equation = aw_175_equation_0, values = (var_1274_cast_fp16_7, var_1252_cast_fp16_7))[name = tensor<string, []>("aw_175_cast_fp16")];
+            tensor<string, []> aw_177_equation_0 = const()[name = tensor<string, []>("aw_177_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_177_cast_fp16 = einsum(equation = aw_177_equation_0, values = (var_1274_cast_fp16_8, var_1252_cast_fp16_8))[name = tensor<string, []>("aw_177_cast_fp16")];
+            tensor<string, []> aw_179_equation_0 = const()[name = tensor<string, []>("aw_179_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_179_cast_fp16 = einsum(equation = aw_179_equation_0, values = (var_1274_cast_fp16_9, var_1252_cast_fp16_9))[name = tensor<string, []>("aw_179_cast_fp16")];
+            tensor<string, []> aw_181_equation_0 = const()[name = tensor<string, []>("aw_181_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_181_cast_fp16 = einsum(equation = aw_181_equation_0, values = (var_1274_cast_fp16_10, var_1252_cast_fp16_10))[name = tensor<string, []>("aw_181_cast_fp16")];
+            tensor<string, []> aw_183_equation_0 = const()[name = tensor<string, []>("aw_183_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_183_cast_fp16 = einsum(equation = aw_183_equation_0, values = (var_1274_cast_fp16_11, var_1252_cast_fp16_11))[name = tensor<string, []>("aw_183_cast_fp16")];
+            tensor<string, []> aw_185_equation_0 = const()[name = tensor<string, []>("aw_185_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_185_cast_fp16 = einsum(equation = aw_185_equation_0, values = (var_1274_cast_fp16_12, var_1252_cast_fp16_12))[name = tensor<string, []>("aw_185_cast_fp16")];
+            tensor<string, []> aw_187_equation_0 = const()[name = tensor<string, []>("aw_187_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_187_cast_fp16 = einsum(equation = aw_187_equation_0, values = (var_1274_cast_fp16_13, var_1252_cast_fp16_13))[name = tensor<string, []>("aw_187_cast_fp16")];
+            tensor<string, []> aw_189_equation_0 = const()[name = tensor<string, []>("aw_189_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_189_cast_fp16 = einsum(equation = aw_189_equation_0, values = (var_1274_cast_fp16_14, var_1252_cast_fp16_14))[name = tensor<string, []>("aw_189_cast_fp16")];
+            tensor<string, []> aw_191_equation_0 = const()[name = tensor<string, []>("aw_191_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_191_cast_fp16 = einsum(equation = aw_191_equation_0, values = (var_1274_cast_fp16_15, var_1252_cast_fp16_15))[name = tensor<string, []>("aw_191_cast_fp16")];
+            tensor<string, []> aw_193_equation_0 = const()[name = tensor<string, []>("aw_193_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_193_cast_fp16 = einsum(equation = aw_193_equation_0, values = (var_1274_cast_fp16_16, var_1252_cast_fp16_16))[name = tensor<string, []>("aw_193_cast_fp16")];
+            tensor<string, []> aw_195_equation_0 = const()[name = tensor<string, []>("aw_195_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_195_cast_fp16 = einsum(equation = aw_195_equation_0, values = (var_1274_cast_fp16_17, var_1252_cast_fp16_17))[name = tensor<string, []>("aw_195_cast_fp16")];
+            tensor<string, []> aw_197_equation_0 = const()[name = tensor<string, []>("aw_197_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_197_cast_fp16 = einsum(equation = aw_197_equation_0, values = (var_1274_cast_fp16_18, var_1252_cast_fp16_18))[name = tensor<string, []>("aw_197_cast_fp16")];
+            tensor<string, []> aw_199_equation_0 = const()[name = tensor<string, []>("aw_199_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_199_cast_fp16 = einsum(equation = aw_199_equation_0, values = (var_1274_cast_fp16_19, var_1252_cast_fp16_19))[name = tensor<string, []>("aw_199_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1356_cast_fp16 = softmax(axis = var_1200, x = aw_161_cast_fp16)[name = tensor<string, []>("op_1356_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1357_cast_fp16 = softmax(axis = var_1200, x = aw_163_cast_fp16)[name = tensor<string, []>("op_1357_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1358_cast_fp16 = softmax(axis = var_1200, x = aw_165_cast_fp16)[name = tensor<string, []>("op_1358_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1359_cast_fp16 = softmax(axis = var_1200, x = aw_167_cast_fp16)[name = tensor<string, []>("op_1359_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1360_cast_fp16 = softmax(axis = var_1200, x = aw_169_cast_fp16)[name = tensor<string, []>("op_1360_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1361_cast_fp16 = softmax(axis = var_1200, x = aw_171_cast_fp16)[name = tensor<string, []>("op_1361_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1362_cast_fp16 = softmax(axis = var_1200, x = aw_173_cast_fp16)[name = tensor<string, []>("op_1362_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1363_cast_fp16 = softmax(axis = var_1200, x = aw_175_cast_fp16)[name = tensor<string, []>("op_1363_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1364_cast_fp16 = softmax(axis = var_1200, x = aw_177_cast_fp16)[name = tensor<string, []>("op_1364_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1365_cast_fp16 = softmax(axis = var_1200, x = aw_179_cast_fp16)[name = tensor<string, []>("op_1365_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1366_cast_fp16 = softmax(axis = var_1200, x = aw_181_cast_fp16)[name = tensor<string, []>("op_1366_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1367_cast_fp16 = softmax(axis = var_1200, x = aw_183_cast_fp16)[name = tensor<string, []>("op_1367_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1368_cast_fp16 = softmax(axis = var_1200, x = aw_185_cast_fp16)[name = tensor<string, []>("op_1368_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1369_cast_fp16 = softmax(axis = var_1200, x = aw_187_cast_fp16)[name = tensor<string, []>("op_1369_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1370_cast_fp16 = softmax(axis = var_1200, x = aw_189_cast_fp16)[name = tensor<string, []>("op_1370_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1371_cast_fp16 = softmax(axis = var_1200, x = aw_191_cast_fp16)[name = tensor<string, []>("op_1371_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1372_cast_fp16 = softmax(axis = var_1200, x = aw_193_cast_fp16)[name = tensor<string, []>("op_1372_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1373_cast_fp16 = softmax(axis = var_1200, x = aw_195_cast_fp16)[name = tensor<string, []>("op_1373_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1374_cast_fp16 = softmax(axis = var_1200, x = aw_197_cast_fp16)[name = tensor<string, []>("op_1374_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1375_cast_fp16 = softmax(axis = var_1200, x = aw_199_cast_fp16)[name = tensor<string, []>("op_1375_cast_fp16")];
+            tensor<string, []> var_1377_equation_0 = const()[name = tensor<string, []>("op_1377_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1377_cast_fp16 = einsum(equation = var_1377_equation_0, values = (var_1295_cast_fp16_0, var_1356_cast_fp16))[name = tensor<string, []>("op_1377_cast_fp16")];
+            tensor<string, []> var_1379_equation_0 = const()[name = tensor<string, []>("op_1379_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1379_cast_fp16 = einsum(equation = var_1379_equation_0, values = (var_1295_cast_fp16_1, var_1357_cast_fp16))[name = tensor<string, []>("op_1379_cast_fp16")];
+            tensor<string, []> var_1381_equation_0 = const()[name = tensor<string, []>("op_1381_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1381_cast_fp16 = einsum(equation = var_1381_equation_0, values = (var_1295_cast_fp16_2, var_1358_cast_fp16))[name = tensor<string, []>("op_1381_cast_fp16")];
+            tensor<string, []> var_1383_equation_0 = const()[name = tensor<string, []>("op_1383_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1383_cast_fp16 = einsum(equation = var_1383_equation_0, values = (var_1295_cast_fp16_3, var_1359_cast_fp16))[name = tensor<string, []>("op_1383_cast_fp16")];
+            tensor<string, []> var_1385_equation_0 = const()[name = tensor<string, []>("op_1385_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1385_cast_fp16 = einsum(equation = var_1385_equation_0, values = (var_1295_cast_fp16_4, var_1360_cast_fp16))[name = tensor<string, []>("op_1385_cast_fp16")];
+            tensor<string, []> var_1387_equation_0 = const()[name = tensor<string, []>("op_1387_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1387_cast_fp16 = einsum(equation = var_1387_equation_0, values = (var_1295_cast_fp16_5, var_1361_cast_fp16))[name = tensor<string, []>("op_1387_cast_fp16")];
+            tensor<string, []> var_1389_equation_0 = const()[name = tensor<string, []>("op_1389_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1389_cast_fp16 = einsum(equation = var_1389_equation_0, values = (var_1295_cast_fp16_6, var_1362_cast_fp16))[name = tensor<string, []>("op_1389_cast_fp16")];
+            tensor<string, []> var_1391_equation_0 = const()[name = tensor<string, []>("op_1391_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1391_cast_fp16 = einsum(equation = var_1391_equation_0, values = (var_1295_cast_fp16_7, var_1363_cast_fp16))[name = tensor<string, []>("op_1391_cast_fp16")];
+            tensor<string, []> var_1393_equation_0 = const()[name = tensor<string, []>("op_1393_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16 = einsum(equation = var_1393_equation_0, values = (var_1295_cast_fp16_8, var_1364_cast_fp16))[name = tensor<string, []>("op_1393_cast_fp16")];
+            tensor<string, []> var_1395_equation_0 = const()[name = tensor<string, []>("op_1395_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1395_cast_fp16 = einsum(equation = var_1395_equation_0, values = (var_1295_cast_fp16_9, var_1365_cast_fp16))[name = tensor<string, []>("op_1395_cast_fp16")];
+            tensor<string, []> var_1397_equation_0 = const()[name = tensor<string, []>("op_1397_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1397_cast_fp16 = einsum(equation = var_1397_equation_0, values = (var_1295_cast_fp16_10, var_1366_cast_fp16))[name = tensor<string, []>("op_1397_cast_fp16")];
+            tensor<string, []> var_1399_equation_0 = const()[name = tensor<string, []>("op_1399_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1399_cast_fp16 = einsum(equation = var_1399_equation_0, values = (var_1295_cast_fp16_11, var_1367_cast_fp16))[name = tensor<string, []>("op_1399_cast_fp16")];
+            tensor<string, []> var_1401_equation_0 = const()[name = tensor<string, []>("op_1401_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1401_cast_fp16 = einsum(equation = var_1401_equation_0, values = (var_1295_cast_fp16_12, var_1368_cast_fp16))[name = tensor<string, []>("op_1401_cast_fp16")];
+            tensor<string, []> var_1403_equation_0 = const()[name = tensor<string, []>("op_1403_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1403_cast_fp16 = einsum(equation = var_1403_equation_0, values = (var_1295_cast_fp16_13, var_1369_cast_fp16))[name = tensor<string, []>("op_1403_cast_fp16")];
+            tensor<string, []> var_1405_equation_0 = const()[name = tensor<string, []>("op_1405_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1405_cast_fp16 = einsum(equation = var_1405_equation_0, values = (var_1295_cast_fp16_14, var_1370_cast_fp16))[name = tensor<string, []>("op_1405_cast_fp16")];
+            tensor<string, []> var_1407_equation_0 = const()[name = tensor<string, []>("op_1407_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1407_cast_fp16 = einsum(equation = var_1407_equation_0, values = (var_1295_cast_fp16_15, var_1371_cast_fp16))[name = tensor<string, []>("op_1407_cast_fp16")];
+            tensor<string, []> var_1409_equation_0 = const()[name = tensor<string, []>("op_1409_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1409_cast_fp16 = einsum(equation = var_1409_equation_0, values = (var_1295_cast_fp16_16, var_1372_cast_fp16))[name = tensor<string, []>("op_1409_cast_fp16")];
+            tensor<string, []> var_1411_equation_0 = const()[name = tensor<string, []>("op_1411_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1411_cast_fp16 = einsum(equation = var_1411_equation_0, values = (var_1295_cast_fp16_17, var_1373_cast_fp16))[name = tensor<string, []>("op_1411_cast_fp16")];
+            tensor<string, []> var_1413_equation_0 = const()[name = tensor<string, []>("op_1413_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1413_cast_fp16 = einsum(equation = var_1413_equation_0, values = (var_1295_cast_fp16_18, var_1374_cast_fp16))[name = tensor<string, []>("op_1413_cast_fp16")];
+            tensor<string, []> var_1415_equation_0 = const()[name = tensor<string, []>("op_1415_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1415_cast_fp16 = einsum(equation = var_1415_equation_0, values = (var_1295_cast_fp16_19, var_1375_cast_fp16))[name = tensor<string, []>("op_1415_cast_fp16")];
+            tensor<bool, []> input_45_interleave_0 = const()[name = tensor<string, []>("input_45_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_45_cast_fp16 = concat(axis = var_1200, interleave = input_45_interleave_0, values = (var_1377_cast_fp16, var_1379_cast_fp16, var_1381_cast_fp16, var_1383_cast_fp16, var_1385_cast_fp16, var_1387_cast_fp16, var_1389_cast_fp16, var_1391_cast_fp16, var_1393_cast_fp16, var_1395_cast_fp16, var_1397_cast_fp16, var_1399_cast_fp16, var_1401_cast_fp16, var_1403_cast_fp16, var_1405_cast_fp16, var_1407_cast_fp16, var_1409_cast_fp16, var_1411_cast_fp16, var_1413_cast_fp16, var_1415_cast_fp16))[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<string, []> var_1424_pad_type_0 = const()[name = tensor<string, []>("op_1424_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1424_strides_0 = const()[name = tensor<string, []>("op_1424_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1424_pad_0 = const()[name = tensor<string, []>("op_1424_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1424_dilations_0 = const()[name = tensor<string, []>("op_1424_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1424_groups_0 = const()[name = tensor<string, []>("op_1424_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_4_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(181544512)))];
+            tensor<fp16, [1280]> blocks_4_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184821376)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1424_cast_fp16 = conv(bias = blocks_4_attn_out_bias_to_fp16, dilations = var_1424_dilations_0, groups = var_1424_groups_0, pad = var_1424_pad_0, pad_type = var_1424_pad_type_0, strides = var_1424_strides_0, weight = blocks_4_attn_out_weight_to_fp16, x = input_45_cast_fp16)[name = tensor<string, []>("op_1424_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = var_1424_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, [1]> input_47_axes_0 = const()[name = tensor<string, []>("input_47_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_47_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_47_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184824000)))];
+            tensor<fp16, [1280]> input_47_beta_0_to_fp16 = const()[name = tensor<string, []>("input_47_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184826624)))];
+            tensor<fp16, []> var_1434_to_fp16 = const()[name = tensor<string, []>("op_1434_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, beta = input_47_beta_0_to_fp16, epsilon = var_1434_to_fp16, gamma = input_47_gamma_0_to_fp16, x = inputs_19_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<string, []> input_49_pad_type_0 = const()[name = tensor<string, []>("input_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_49_strides_0 = const()[name = tensor<string, []>("input_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_49_pad_0 = const()[name = tensor<string, []>("input_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_49_dilations_0 = const()[name = tensor<string, []>("input_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_49_groups_0 = const()[name = tensor<string, []>("input_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_4_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184829248)))];
+            tensor<fp16, [5120]> blocks_4_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197936512)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_49_cast_fp16 = conv(bias = blocks_4_mlp_0_bias_to_fp16, dilations = input_49_dilations_0, groups = input_49_groups_0, pad = input_49_pad_0, pad_type = input_49_pad_type_0, strides = input_49_strides_0, weight = blocks_4_mlp_0_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<string, []> input_51_mode_0 = const()[name = tensor<string, []>("input_51_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_51_cast_fp16 = gelu(mode = input_51_mode_0, x = input_49_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<string, []> var_1460_pad_type_0 = const()[name = tensor<string, []>("op_1460_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1460_strides_0 = const()[name = tensor<string, []>("op_1460_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1460_pad_0 = const()[name = tensor<string, []>("op_1460_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1460_dilations_0 = const()[name = tensor<string, []>("op_1460_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1460_groups_0 = const()[name = tensor<string, []>("op_1460_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_4_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197946816)))];
+            tensor<fp16, [1280]> blocks_4_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211054080)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1460_cast_fp16 = conv(bias = blocks_4_mlp_2_bias_to_fp16, dilations = var_1460_dilations_0, groups = var_1460_groups_0, pad = var_1460_pad_0, pad_type = var_1460_pad_type_0, strides = var_1460_strides_0, weight = blocks_4_mlp_2_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("op_1460_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = var_1460_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, []> var_1469 = const()[name = tensor<string, []>("op_1469"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_53_axes_0 = const()[name = tensor<string, []>("input_53_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_53_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_53_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211056704)))];
+            tensor<fp16, [1280]> input_53_beta_0_to_fp16 = const()[name = tensor<string, []>("input_53_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211059328)))];
+            tensor<fp16, []> var_1485_to_fp16 = const()[name = tensor<string, []>("op_1485_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_53_cast_fp16 = layer_norm(axes = input_53_axes_0, beta = input_53_beta_0_to_fp16, epsilon = var_1485_to_fp16, gamma = input_53_gamma_0_to_fp16, x = inputs_21_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<string, []> q_11_pad_type_0 = const()[name = tensor<string, []>("q_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_11_strides_0 = const()[name = tensor<string, []>("q_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_11_pad_0 = const()[name = tensor<string, []>("q_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_11_dilations_0 = const()[name = tensor<string, []>("q_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_11_groups_0 = const()[name = tensor<string, []>("q_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_1520_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1520_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211061952)))];
+            tensor<fp16, [1280]> var_1520_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1520_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214338816)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1520_cast_fp16 = conv(bias = var_1520_bias_0_to_fp16, dilations = q_11_dilations_0, groups = q_11_groups_0, pad = q_11_pad_0, pad_type = q_11_pad_type_0, strides = q_11_strides_0, weight = var_1520_weight_0_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_1520_cast_fp16")];
+            tensor<string, []> k_11_pad_type_0 = const()[name = tensor<string, []>("k_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_11_strides_0 = const()[name = tensor<string, []>("k_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_11_pad_0 = const()[name = tensor<string, []>("k_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_11_dilations_0 = const()[name = tensor<string, []>("k_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_11_groups_0 = const()[name = tensor<string, []>("k_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_5_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214341440)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_11_cast_fp16 = conv(dilations = k_11_dilations_0, groups = k_11_groups_0, pad = k_11_pad_0, pad_type = k_11_pad_type_0, strides = k_11_strides_0, weight = blocks_5_attn_key_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("k_11_cast_fp16")];
+            tensor<string, []> var_1518_pad_type_0 = const()[name = tensor<string, []>("op_1518_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1518_strides_0 = const()[name = tensor<string, []>("op_1518_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1518_pad_0 = const()[name = tensor<string, []>("op_1518_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1518_dilations_0 = const()[name = tensor<string, []>("op_1518_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1518_groups_0 = const()[name = tensor<string, []>("op_1518_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_5_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217618304)))];
+            tensor<fp16, [1280]> blocks_5_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(220895168)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1518_cast_fp16 = conv(bias = blocks_5_attn_value_bias_to_fp16, dilations = var_1518_dilations_0, groups = var_1518_groups_0, pad = var_1518_pad_0, pad_type = var_1518_pad_type_0, strides = var_1518_strides_0, weight = blocks_5_attn_value_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_1518_cast_fp16")];
+            tensor<int32, [20]> tile_15 = const()[name = tensor<string, []>("tile_15"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1521_axis_0 = const()[name = tensor<string, []>("op_1521_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_19 = split(axis = var_1521_axis_0, split_sizes = tile_15, x = var_1520_cast_fp16)[name = tensor<string, []>("op_1521_cast_fp16")];
+            tensor<int32, [4]> var_1542_perm_0 = const()[name = tensor<string, []>("op_1542_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_16 = const()[name = tensor<string, []>("tile_16"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1543_axis_0 = const()[name = tensor<string, []>("op_1543_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_1542_cast_fp16 = transpose(perm = var_1542_perm_0, x = k_11_cast_fp16)[name = tensor<string, []>("transpose_27")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_19 = split(axis = var_1543_axis_0, split_sizes = tile_16, x = var_1542_cast_fp16)[name = tensor<string, []>("op_1543_cast_fp16")];
+            tensor<int32, [20]> tile_17 = const()[name = tensor<string, []>("tile_17"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1564_axis_0 = const()[name = tensor<string, []>("op_1564_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_19 = split(axis = var_1564_axis_0, split_sizes = tile_17, x = var_1518_cast_fp16)[name = tensor<string, []>("op_1564_cast_fp16")];
+            tensor<string, []> aw_201_equation_0 = const()[name = tensor<string, []>("aw_201_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_201_cast_fp16 = einsum(equation = aw_201_equation_0, values = (var_1543_cast_fp16_0, var_1521_cast_fp16_0))[name = tensor<string, []>("aw_201_cast_fp16")];
+            tensor<string, []> aw_203_equation_0 = const()[name = tensor<string, []>("aw_203_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_203_cast_fp16 = einsum(equation = aw_203_equation_0, values = (var_1543_cast_fp16_1, var_1521_cast_fp16_1))[name = tensor<string, []>("aw_203_cast_fp16")];
+            tensor<string, []> aw_205_equation_0 = const()[name = tensor<string, []>("aw_205_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_205_cast_fp16 = einsum(equation = aw_205_equation_0, values = (var_1543_cast_fp16_2, var_1521_cast_fp16_2))[name = tensor<string, []>("aw_205_cast_fp16")];
+            tensor<string, []> aw_207_equation_0 = const()[name = tensor<string, []>("aw_207_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_207_cast_fp16 = einsum(equation = aw_207_equation_0, values = (var_1543_cast_fp16_3, var_1521_cast_fp16_3))[name = tensor<string, []>("aw_207_cast_fp16")];
+            tensor<string, []> aw_209_equation_0 = const()[name = tensor<string, []>("aw_209_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_209_cast_fp16 = einsum(equation = aw_209_equation_0, values = (var_1543_cast_fp16_4, var_1521_cast_fp16_4))[name = tensor<string, []>("aw_209_cast_fp16")];
+            tensor<string, []> aw_211_equation_0 = const()[name = tensor<string, []>("aw_211_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_211_cast_fp16 = einsum(equation = aw_211_equation_0, values = (var_1543_cast_fp16_5, var_1521_cast_fp16_5))[name = tensor<string, []>("aw_211_cast_fp16")];
+            tensor<string, []> aw_213_equation_0 = const()[name = tensor<string, []>("aw_213_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_213_cast_fp16 = einsum(equation = aw_213_equation_0, values = (var_1543_cast_fp16_6, var_1521_cast_fp16_6))[name = tensor<string, []>("aw_213_cast_fp16")];
+            tensor<string, []> aw_215_equation_0 = const()[name = tensor<string, []>("aw_215_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_215_cast_fp16 = einsum(equation = aw_215_equation_0, values = (var_1543_cast_fp16_7, var_1521_cast_fp16_7))[name = tensor<string, []>("aw_215_cast_fp16")];
+            tensor<string, []> aw_217_equation_0 = const()[name = tensor<string, []>("aw_217_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_217_cast_fp16 = einsum(equation = aw_217_equation_0, values = (var_1543_cast_fp16_8, var_1521_cast_fp16_8))[name = tensor<string, []>("aw_217_cast_fp16")];
+            tensor<string, []> aw_219_equation_0 = const()[name = tensor<string, []>("aw_219_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_219_cast_fp16 = einsum(equation = aw_219_equation_0, values = (var_1543_cast_fp16_9, var_1521_cast_fp16_9))[name = tensor<string, []>("aw_219_cast_fp16")];
+            tensor<string, []> aw_221_equation_0 = const()[name = tensor<string, []>("aw_221_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_221_cast_fp16 = einsum(equation = aw_221_equation_0, values = (var_1543_cast_fp16_10, var_1521_cast_fp16_10))[name = tensor<string, []>("aw_221_cast_fp16")];
+            tensor<string, []> aw_223_equation_0 = const()[name = tensor<string, []>("aw_223_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_223_cast_fp16 = einsum(equation = aw_223_equation_0, values = (var_1543_cast_fp16_11, var_1521_cast_fp16_11))[name = tensor<string, []>("aw_223_cast_fp16")];
+            tensor<string, []> aw_225_equation_0 = const()[name = tensor<string, []>("aw_225_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_225_cast_fp16 = einsum(equation = aw_225_equation_0, values = (var_1543_cast_fp16_12, var_1521_cast_fp16_12))[name = tensor<string, []>("aw_225_cast_fp16")];
+            tensor<string, []> aw_227_equation_0 = const()[name = tensor<string, []>("aw_227_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_227_cast_fp16 = einsum(equation = aw_227_equation_0, values = (var_1543_cast_fp16_13, var_1521_cast_fp16_13))[name = tensor<string, []>("aw_227_cast_fp16")];
+            tensor<string, []> aw_229_equation_0 = const()[name = tensor<string, []>("aw_229_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_229_cast_fp16 = einsum(equation = aw_229_equation_0, values = (var_1543_cast_fp16_14, var_1521_cast_fp16_14))[name = tensor<string, []>("aw_229_cast_fp16")];
+            tensor<string, []> aw_231_equation_0 = const()[name = tensor<string, []>("aw_231_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_231_cast_fp16 = einsum(equation = aw_231_equation_0, values = (var_1543_cast_fp16_15, var_1521_cast_fp16_15))[name = tensor<string, []>("aw_231_cast_fp16")];
+            tensor<string, []> aw_233_equation_0 = const()[name = tensor<string, []>("aw_233_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_233_cast_fp16 = einsum(equation = aw_233_equation_0, values = (var_1543_cast_fp16_16, var_1521_cast_fp16_16))[name = tensor<string, []>("aw_233_cast_fp16")];
+            tensor<string, []> aw_235_equation_0 = const()[name = tensor<string, []>("aw_235_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_235_cast_fp16 = einsum(equation = aw_235_equation_0, values = (var_1543_cast_fp16_17, var_1521_cast_fp16_17))[name = tensor<string, []>("aw_235_cast_fp16")];
+            tensor<string, []> aw_237_equation_0 = const()[name = tensor<string, []>("aw_237_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_237_cast_fp16 = einsum(equation = aw_237_equation_0, values = (var_1543_cast_fp16_18, var_1521_cast_fp16_18))[name = tensor<string, []>("aw_237_cast_fp16")];
+            tensor<string, []> aw_239_equation_0 = const()[name = tensor<string, []>("aw_239_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_239_cast_fp16 = einsum(equation = aw_239_equation_0, values = (var_1543_cast_fp16_19, var_1521_cast_fp16_19))[name = tensor<string, []>("aw_239_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1625_cast_fp16 = softmax(axis = var_1469, x = aw_201_cast_fp16)[name = tensor<string, []>("op_1625_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1626_cast_fp16 = softmax(axis = var_1469, x = aw_203_cast_fp16)[name = tensor<string, []>("op_1626_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1627_cast_fp16 = softmax(axis = var_1469, x = aw_205_cast_fp16)[name = tensor<string, []>("op_1627_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1628_cast_fp16 = softmax(axis = var_1469, x = aw_207_cast_fp16)[name = tensor<string, []>("op_1628_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1629_cast_fp16 = softmax(axis = var_1469, x = aw_209_cast_fp16)[name = tensor<string, []>("op_1629_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1630_cast_fp16 = softmax(axis = var_1469, x = aw_211_cast_fp16)[name = tensor<string, []>("op_1630_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1631_cast_fp16 = softmax(axis = var_1469, x = aw_213_cast_fp16)[name = tensor<string, []>("op_1631_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1632_cast_fp16 = softmax(axis = var_1469, x = aw_215_cast_fp16)[name = tensor<string, []>("op_1632_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1633_cast_fp16 = softmax(axis = var_1469, x = aw_217_cast_fp16)[name = tensor<string, []>("op_1633_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1634_cast_fp16 = softmax(axis = var_1469, x = aw_219_cast_fp16)[name = tensor<string, []>("op_1634_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1635_cast_fp16 = softmax(axis = var_1469, x = aw_221_cast_fp16)[name = tensor<string, []>("op_1635_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1636_cast_fp16 = softmax(axis = var_1469, x = aw_223_cast_fp16)[name = tensor<string, []>("op_1636_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1637_cast_fp16 = softmax(axis = var_1469, x = aw_225_cast_fp16)[name = tensor<string, []>("op_1637_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1638_cast_fp16 = softmax(axis = var_1469, x = aw_227_cast_fp16)[name = tensor<string, []>("op_1638_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1639_cast_fp16 = softmax(axis = var_1469, x = aw_229_cast_fp16)[name = tensor<string, []>("op_1639_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1640_cast_fp16 = softmax(axis = var_1469, x = aw_231_cast_fp16)[name = tensor<string, []>("op_1640_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1641_cast_fp16 = softmax(axis = var_1469, x = aw_233_cast_fp16)[name = tensor<string, []>("op_1641_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1642_cast_fp16 = softmax(axis = var_1469, x = aw_235_cast_fp16)[name = tensor<string, []>("op_1642_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1643_cast_fp16 = softmax(axis = var_1469, x = aw_237_cast_fp16)[name = tensor<string, []>("op_1643_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1644_cast_fp16 = softmax(axis = var_1469, x = aw_239_cast_fp16)[name = tensor<string, []>("op_1644_cast_fp16")];
+            tensor<string, []> var_1646_equation_0 = const()[name = tensor<string, []>("op_1646_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1646_cast_fp16 = einsum(equation = var_1646_equation_0, values = (var_1564_cast_fp16_0, var_1625_cast_fp16))[name = tensor<string, []>("op_1646_cast_fp16")];
+            tensor<string, []> var_1648_equation_0 = const()[name = tensor<string, []>("op_1648_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1648_cast_fp16 = einsum(equation = var_1648_equation_0, values = (var_1564_cast_fp16_1, var_1626_cast_fp16))[name = tensor<string, []>("op_1648_cast_fp16")];
+            tensor<string, []> var_1650_equation_0 = const()[name = tensor<string, []>("op_1650_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1650_cast_fp16 = einsum(equation = var_1650_equation_0, values = (var_1564_cast_fp16_2, var_1627_cast_fp16))[name = tensor<string, []>("op_1650_cast_fp16")];
+            tensor<string, []> var_1652_equation_0 = const()[name = tensor<string, []>("op_1652_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1652_cast_fp16 = einsum(equation = var_1652_equation_0, values = (var_1564_cast_fp16_3, var_1628_cast_fp16))[name = tensor<string, []>("op_1652_cast_fp16")];
+            tensor<string, []> var_1654_equation_0 = const()[name = tensor<string, []>("op_1654_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1654_cast_fp16 = einsum(equation = var_1654_equation_0, values = (var_1564_cast_fp16_4, var_1629_cast_fp16))[name = tensor<string, []>("op_1654_cast_fp16")];
+            tensor<string, []> var_1656_equation_0 = const()[name = tensor<string, []>("op_1656_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1656_cast_fp16 = einsum(equation = var_1656_equation_0, values = (var_1564_cast_fp16_5, var_1630_cast_fp16))[name = tensor<string, []>("op_1656_cast_fp16")];
+            tensor<string, []> var_1658_equation_0 = const()[name = tensor<string, []>("op_1658_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1658_cast_fp16 = einsum(equation = var_1658_equation_0, values = (var_1564_cast_fp16_6, var_1631_cast_fp16))[name = tensor<string, []>("op_1658_cast_fp16")];
+            tensor<string, []> var_1660_equation_0 = const()[name = tensor<string, []>("op_1660_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1660_cast_fp16 = einsum(equation = var_1660_equation_0, values = (var_1564_cast_fp16_7, var_1632_cast_fp16))[name = tensor<string, []>("op_1660_cast_fp16")];
+            tensor<string, []> var_1662_equation_0 = const()[name = tensor<string, []>("op_1662_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1662_cast_fp16 = einsum(equation = var_1662_equation_0, values = (var_1564_cast_fp16_8, var_1633_cast_fp16))[name = tensor<string, []>("op_1662_cast_fp16")];
+            tensor<string, []> var_1664_equation_0 = const()[name = tensor<string, []>("op_1664_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1664_cast_fp16 = einsum(equation = var_1664_equation_0, values = (var_1564_cast_fp16_9, var_1634_cast_fp16))[name = tensor<string, []>("op_1664_cast_fp16")];
+            tensor<string, []> var_1666_equation_0 = const()[name = tensor<string, []>("op_1666_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1666_cast_fp16 = einsum(equation = var_1666_equation_0, values = (var_1564_cast_fp16_10, var_1635_cast_fp16))[name = tensor<string, []>("op_1666_cast_fp16")];
+            tensor<string, []> var_1668_equation_0 = const()[name = tensor<string, []>("op_1668_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1668_cast_fp16 = einsum(equation = var_1668_equation_0, values = (var_1564_cast_fp16_11, var_1636_cast_fp16))[name = tensor<string, []>("op_1668_cast_fp16")];
+            tensor<string, []> var_1670_equation_0 = const()[name = tensor<string, []>("op_1670_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1670_cast_fp16 = einsum(equation = var_1670_equation_0, values = (var_1564_cast_fp16_12, var_1637_cast_fp16))[name = tensor<string, []>("op_1670_cast_fp16")];
+            tensor<string, []> var_1672_equation_0 = const()[name = tensor<string, []>("op_1672_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1672_cast_fp16 = einsum(equation = var_1672_equation_0, values = (var_1564_cast_fp16_13, var_1638_cast_fp16))[name = tensor<string, []>("op_1672_cast_fp16")];
+            tensor<string, []> var_1674_equation_0 = const()[name = tensor<string, []>("op_1674_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1674_cast_fp16 = einsum(equation = var_1674_equation_0, values = (var_1564_cast_fp16_14, var_1639_cast_fp16))[name = tensor<string, []>("op_1674_cast_fp16")];
+            tensor<string, []> var_1676_equation_0 = const()[name = tensor<string, []>("op_1676_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1676_cast_fp16 = einsum(equation = var_1676_equation_0, values = (var_1564_cast_fp16_15, var_1640_cast_fp16))[name = tensor<string, []>("op_1676_cast_fp16")];
+            tensor<string, []> var_1678_equation_0 = const()[name = tensor<string, []>("op_1678_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1678_cast_fp16 = einsum(equation = var_1678_equation_0, values = (var_1564_cast_fp16_16, var_1641_cast_fp16))[name = tensor<string, []>("op_1678_cast_fp16")];
+            tensor<string, []> var_1680_equation_0 = const()[name = tensor<string, []>("op_1680_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1680_cast_fp16 = einsum(equation = var_1680_equation_0, values = (var_1564_cast_fp16_17, var_1642_cast_fp16))[name = tensor<string, []>("op_1680_cast_fp16")];
+            tensor<string, []> var_1682_equation_0 = const()[name = tensor<string, []>("op_1682_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1682_cast_fp16 = einsum(equation = var_1682_equation_0, values = (var_1564_cast_fp16_18, var_1643_cast_fp16))[name = tensor<string, []>("op_1682_cast_fp16")];
+            tensor<string, []> var_1684_equation_0 = const()[name = tensor<string, []>("op_1684_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1684_cast_fp16 = einsum(equation = var_1684_equation_0, values = (var_1564_cast_fp16_19, var_1644_cast_fp16))[name = tensor<string, []>("op_1684_cast_fp16")];
+            tensor<bool, []> input_55_interleave_0 = const()[name = tensor<string, []>("input_55_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_55_cast_fp16 = concat(axis = var_1469, interleave = input_55_interleave_0, values = (var_1646_cast_fp16, var_1648_cast_fp16, var_1650_cast_fp16, var_1652_cast_fp16, var_1654_cast_fp16, var_1656_cast_fp16, var_1658_cast_fp16, var_1660_cast_fp16, var_1662_cast_fp16, var_1664_cast_fp16, var_1666_cast_fp16, var_1668_cast_fp16, var_1670_cast_fp16, var_1672_cast_fp16, var_1674_cast_fp16, var_1676_cast_fp16, var_1678_cast_fp16, var_1680_cast_fp16, var_1682_cast_fp16, var_1684_cast_fp16))[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<string, []> var_1693_pad_type_0 = const()[name = tensor<string, []>("op_1693_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1693_strides_0 = const()[name = tensor<string, []>("op_1693_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1693_pad_0 = const()[name = tensor<string, []>("op_1693_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1693_dilations_0 = const()[name = tensor<string, []>("op_1693_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1693_groups_0 = const()[name = tensor<string, []>("op_1693_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_5_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(220897792)))];
+            tensor<fp16, [1280]> blocks_5_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(224174656)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1693_cast_fp16 = conv(bias = blocks_5_attn_out_bias_to_fp16, dilations = var_1693_dilations_0, groups = var_1693_groups_0, pad = var_1693_pad_0, pad_type = var_1693_pad_type_0, strides = var_1693_strides_0, weight = blocks_5_attn_out_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("op_1693_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = var_1693_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> input_57_axes_0 = const()[name = tensor<string, []>("input_57_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_57_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_57_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(224177280)))];
+            tensor<fp16, [1280]> input_57_beta_0_to_fp16 = const()[name = tensor<string, []>("input_57_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(224179904)))];
+            tensor<fp16, []> var_1703_to_fp16 = const()[name = tensor<string, []>("op_1703_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_57_cast_fp16 = layer_norm(axes = input_57_axes_0, beta = input_57_beta_0_to_fp16, epsilon = var_1703_to_fp16, gamma = input_57_gamma_0_to_fp16, x = inputs_23_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<string, []> input_59_pad_type_0 = const()[name = tensor<string, []>("input_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_59_strides_0 = const()[name = tensor<string, []>("input_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_59_pad_0 = const()[name = tensor<string, []>("input_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_59_dilations_0 = const()[name = tensor<string, []>("input_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_59_groups_0 = const()[name = tensor<string, []>("input_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_5_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(224182528)))];
+            tensor<fp16, [5120]> blocks_5_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237289792)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_59_cast_fp16 = conv(bias = blocks_5_mlp_0_bias_to_fp16, dilations = input_59_dilations_0, groups = input_59_groups_0, pad = input_59_pad_0, pad_type = input_59_pad_type_0, strides = input_59_strides_0, weight = blocks_5_mlp_0_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<string, []> input_61_mode_0 = const()[name = tensor<string, []>("input_61_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_61_cast_fp16 = gelu(mode = input_61_mode_0, x = input_59_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
+            tensor<string, []> var_1729_pad_type_0 = const()[name = tensor<string, []>("op_1729_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1729_strides_0 = const()[name = tensor<string, []>("op_1729_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1729_pad_0 = const()[name = tensor<string, []>("op_1729_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1729_dilations_0 = const()[name = tensor<string, []>("op_1729_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1729_groups_0 = const()[name = tensor<string, []>("op_1729_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_5_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237300096)))];
+            tensor<fp16, [1280]> blocks_5_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250407360)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1729_cast_fp16 = conv(bias = blocks_5_mlp_2_bias_to_fp16, dilations = var_1729_dilations_0, groups = var_1729_groups_0, pad = var_1729_pad_0, pad_type = var_1729_pad_type_0, strides = var_1729_strides_0, weight = blocks_5_mlp_2_weight_to_fp16, x = input_61_cast_fp16)[name = tensor<string, []>("op_1729_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = var_1729_cast_fp16)[name = tensor<string, []>("inputs_25_cast_fp16")];
+            tensor<int32, []> var_1738 = const()[name = tensor<string, []>("op_1738"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_63_axes_0 = const()[name = tensor<string, []>("input_63_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_63_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_63_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250409984)))];
+            tensor<fp16, [1280]> input_63_beta_0_to_fp16 = const()[name = tensor<string, []>("input_63_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250412608)))];
+            tensor<fp16, []> var_1754_to_fp16 = const()[name = tensor<string, []>("op_1754_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_63_cast_fp16 = layer_norm(axes = input_63_axes_0, beta = input_63_beta_0_to_fp16, epsilon = var_1754_to_fp16, gamma = input_63_gamma_0_to_fp16, x = inputs_25_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
+            tensor<string, []> q_13_pad_type_0 = const()[name = tensor<string, []>("q_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_13_strides_0 = const()[name = tensor<string, []>("q_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_13_pad_0 = const()[name = tensor<string, []>("q_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_13_dilations_0 = const()[name = tensor<string, []>("q_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_13_groups_0 = const()[name = tensor<string, []>("q_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_1789_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1789_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250415232)))];
+            tensor<fp16, [1280]> var_1789_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1789_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(253692096)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1789_cast_fp16 = conv(bias = var_1789_bias_0_to_fp16, dilations = q_13_dilations_0, groups = q_13_groups_0, pad = q_13_pad_0, pad_type = q_13_pad_type_0, strides = q_13_strides_0, weight = var_1789_weight_0_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("op_1789_cast_fp16")];
+            tensor<string, []> k_13_pad_type_0 = const()[name = tensor<string, []>("k_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_13_strides_0 = const()[name = tensor<string, []>("k_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_13_pad_0 = const()[name = tensor<string, []>("k_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_13_dilations_0 = const()[name = tensor<string, []>("k_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_13_groups_0 = const()[name = tensor<string, []>("k_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_6_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(253694720)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_13_cast_fp16 = conv(dilations = k_13_dilations_0, groups = k_13_groups_0, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = k_13_strides_0, weight = blocks_6_attn_key_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("k_13_cast_fp16")];
+            tensor<string, []> var_1787_pad_type_0 = const()[name = tensor<string, []>("op_1787_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1787_strides_0 = const()[name = tensor<string, []>("op_1787_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1787_pad_0 = const()[name = tensor<string, []>("op_1787_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1787_dilations_0 = const()[name = tensor<string, []>("op_1787_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1787_groups_0 = const()[name = tensor<string, []>("op_1787_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_6_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(256971584)))];
+            tensor<fp16, [1280]> blocks_6_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(260248448)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1787_cast_fp16 = conv(bias = blocks_6_attn_value_bias_to_fp16, dilations = var_1787_dilations_0, groups = var_1787_groups_0, pad = var_1787_pad_0, pad_type = var_1787_pad_type_0, strides = var_1787_strides_0, weight = blocks_6_attn_value_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("op_1787_cast_fp16")];
+            tensor<int32, [20]> tile_18 = const()[name = tensor<string, []>("tile_18"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1790_axis_0 = const()[name = tensor<string, []>("op_1790_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_19 = split(axis = var_1790_axis_0, split_sizes = tile_18, x = var_1789_cast_fp16)[name = tensor<string, []>("op_1790_cast_fp16")];
+            tensor<int32, [4]> var_1811_perm_0 = const()[name = tensor<string, []>("op_1811_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_19 = const()[name = tensor<string, []>("tile_19"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1812_axis_0 = const()[name = tensor<string, []>("op_1812_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_1811_cast_fp16 = transpose(perm = var_1811_perm_0, x = k_13_cast_fp16)[name = tensor<string, []>("transpose_26")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_19 = split(axis = var_1812_axis_0, split_sizes = tile_19, x = var_1811_cast_fp16)[name = tensor<string, []>("op_1812_cast_fp16")];
+            tensor<int32, [20]> tile_20 = const()[name = tensor<string, []>("tile_20"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1833_axis_0 = const()[name = tensor<string, []>("op_1833_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_19 = split(axis = var_1833_axis_0, split_sizes = tile_20, x = var_1787_cast_fp16)[name = tensor<string, []>("op_1833_cast_fp16")];
+            tensor<string, []> aw_241_equation_0 = const()[name = tensor<string, []>("aw_241_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_241_cast_fp16 = einsum(equation = aw_241_equation_0, values = (var_1812_cast_fp16_0, var_1790_cast_fp16_0))[name = tensor<string, []>("aw_241_cast_fp16")];
+            tensor<string, []> aw_243_equation_0 = const()[name = tensor<string, []>("aw_243_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_243_cast_fp16 = einsum(equation = aw_243_equation_0, values = (var_1812_cast_fp16_1, var_1790_cast_fp16_1))[name = tensor<string, []>("aw_243_cast_fp16")];
+            tensor<string, []> aw_245_equation_0 = const()[name = tensor<string, []>("aw_245_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_245_cast_fp16 = einsum(equation = aw_245_equation_0, values = (var_1812_cast_fp16_2, var_1790_cast_fp16_2))[name = tensor<string, []>("aw_245_cast_fp16")];
+            tensor<string, []> aw_247_equation_0 = const()[name = tensor<string, []>("aw_247_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_247_cast_fp16 = einsum(equation = aw_247_equation_0, values = (var_1812_cast_fp16_3, var_1790_cast_fp16_3))[name = tensor<string, []>("aw_247_cast_fp16")];
+            tensor<string, []> aw_249_equation_0 = const()[name = tensor<string, []>("aw_249_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_249_cast_fp16 = einsum(equation = aw_249_equation_0, values = (var_1812_cast_fp16_4, var_1790_cast_fp16_4))[name = tensor<string, []>("aw_249_cast_fp16")];
+            tensor<string, []> aw_251_equation_0 = const()[name = tensor<string, []>("aw_251_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_251_cast_fp16 = einsum(equation = aw_251_equation_0, values = (var_1812_cast_fp16_5, var_1790_cast_fp16_5))[name = tensor<string, []>("aw_251_cast_fp16")];
+            tensor<string, []> aw_253_equation_0 = const()[name = tensor<string, []>("aw_253_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_253_cast_fp16 = einsum(equation = aw_253_equation_0, values = (var_1812_cast_fp16_6, var_1790_cast_fp16_6))[name = tensor<string, []>("aw_253_cast_fp16")];
+            tensor<string, []> aw_255_equation_0 = const()[name = tensor<string, []>("aw_255_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_255_cast_fp16 = einsum(equation = aw_255_equation_0, values = (var_1812_cast_fp16_7, var_1790_cast_fp16_7))[name = tensor<string, []>("aw_255_cast_fp16")];
+            tensor<string, []> aw_257_equation_0 = const()[name = tensor<string, []>("aw_257_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_257_cast_fp16 = einsum(equation = aw_257_equation_0, values = (var_1812_cast_fp16_8, var_1790_cast_fp16_8))[name = tensor<string, []>("aw_257_cast_fp16")];
+            tensor<string, []> aw_259_equation_0 = const()[name = tensor<string, []>("aw_259_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_259_cast_fp16 = einsum(equation = aw_259_equation_0, values = (var_1812_cast_fp16_9, var_1790_cast_fp16_9))[name = tensor<string, []>("aw_259_cast_fp16")];
+            tensor<string, []> aw_261_equation_0 = const()[name = tensor<string, []>("aw_261_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_261_cast_fp16 = einsum(equation = aw_261_equation_0, values = (var_1812_cast_fp16_10, var_1790_cast_fp16_10))[name = tensor<string, []>("aw_261_cast_fp16")];
+            tensor<string, []> aw_263_equation_0 = const()[name = tensor<string, []>("aw_263_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_263_cast_fp16 = einsum(equation = aw_263_equation_0, values = (var_1812_cast_fp16_11, var_1790_cast_fp16_11))[name = tensor<string, []>("aw_263_cast_fp16")];
+            tensor<string, []> aw_265_equation_0 = const()[name = tensor<string, []>("aw_265_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_265_cast_fp16 = einsum(equation = aw_265_equation_0, values = (var_1812_cast_fp16_12, var_1790_cast_fp16_12))[name = tensor<string, []>("aw_265_cast_fp16")];
+            tensor<string, []> aw_267_equation_0 = const()[name = tensor<string, []>("aw_267_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_267_cast_fp16 = einsum(equation = aw_267_equation_0, values = (var_1812_cast_fp16_13, var_1790_cast_fp16_13))[name = tensor<string, []>("aw_267_cast_fp16")];
+            tensor<string, []> aw_269_equation_0 = const()[name = tensor<string, []>("aw_269_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_269_cast_fp16 = einsum(equation = aw_269_equation_0, values = (var_1812_cast_fp16_14, var_1790_cast_fp16_14))[name = tensor<string, []>("aw_269_cast_fp16")];
+            tensor<string, []> aw_271_equation_0 = const()[name = tensor<string, []>("aw_271_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_271_cast_fp16 = einsum(equation = aw_271_equation_0, values = (var_1812_cast_fp16_15, var_1790_cast_fp16_15))[name = tensor<string, []>("aw_271_cast_fp16")];
+            tensor<string, []> aw_273_equation_0 = const()[name = tensor<string, []>("aw_273_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_273_cast_fp16 = einsum(equation = aw_273_equation_0, values = (var_1812_cast_fp16_16, var_1790_cast_fp16_16))[name = tensor<string, []>("aw_273_cast_fp16")];
+            tensor<string, []> aw_275_equation_0 = const()[name = tensor<string, []>("aw_275_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_275_cast_fp16 = einsum(equation = aw_275_equation_0, values = (var_1812_cast_fp16_17, var_1790_cast_fp16_17))[name = tensor<string, []>("aw_275_cast_fp16")];
+            tensor<string, []> aw_277_equation_0 = const()[name = tensor<string, []>("aw_277_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_277_cast_fp16 = einsum(equation = aw_277_equation_0, values = (var_1812_cast_fp16_18, var_1790_cast_fp16_18))[name = tensor<string, []>("aw_277_cast_fp16")];
+            tensor<string, []> aw_279_equation_0 = const()[name = tensor<string, []>("aw_279_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_279_cast_fp16 = einsum(equation = aw_279_equation_0, values = (var_1812_cast_fp16_19, var_1790_cast_fp16_19))[name = tensor<string, []>("aw_279_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1894_cast_fp16 = softmax(axis = var_1738, x = aw_241_cast_fp16)[name = tensor<string, []>("op_1894_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1895_cast_fp16 = softmax(axis = var_1738, x = aw_243_cast_fp16)[name = tensor<string, []>("op_1895_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1896_cast_fp16 = softmax(axis = var_1738, x = aw_245_cast_fp16)[name = tensor<string, []>("op_1896_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1897_cast_fp16 = softmax(axis = var_1738, x = aw_247_cast_fp16)[name = tensor<string, []>("op_1897_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1898_cast_fp16 = softmax(axis = var_1738, x = aw_249_cast_fp16)[name = tensor<string, []>("op_1898_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1899_cast_fp16 = softmax(axis = var_1738, x = aw_251_cast_fp16)[name = tensor<string, []>("op_1899_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1900_cast_fp16 = softmax(axis = var_1738, x = aw_253_cast_fp16)[name = tensor<string, []>("op_1900_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1901_cast_fp16 = softmax(axis = var_1738, x = aw_255_cast_fp16)[name = tensor<string, []>("op_1901_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1902_cast_fp16 = softmax(axis = var_1738, x = aw_257_cast_fp16)[name = tensor<string, []>("op_1902_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1903_cast_fp16 = softmax(axis = var_1738, x = aw_259_cast_fp16)[name = tensor<string, []>("op_1903_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1904_cast_fp16 = softmax(axis = var_1738, x = aw_261_cast_fp16)[name = tensor<string, []>("op_1904_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1905_cast_fp16 = softmax(axis = var_1738, x = aw_263_cast_fp16)[name = tensor<string, []>("op_1905_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1906_cast_fp16 = softmax(axis = var_1738, x = aw_265_cast_fp16)[name = tensor<string, []>("op_1906_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1907_cast_fp16 = softmax(axis = var_1738, x = aw_267_cast_fp16)[name = tensor<string, []>("op_1907_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1908_cast_fp16 = softmax(axis = var_1738, x = aw_269_cast_fp16)[name = tensor<string, []>("op_1908_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1909_cast_fp16 = softmax(axis = var_1738, x = aw_271_cast_fp16)[name = tensor<string, []>("op_1909_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1910_cast_fp16 = softmax(axis = var_1738, x = aw_273_cast_fp16)[name = tensor<string, []>("op_1910_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1911_cast_fp16 = softmax(axis = var_1738, x = aw_275_cast_fp16)[name = tensor<string, []>("op_1911_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1912_cast_fp16 = softmax(axis = var_1738, x = aw_277_cast_fp16)[name = tensor<string, []>("op_1912_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1913_cast_fp16 = softmax(axis = var_1738, x = aw_279_cast_fp16)[name = tensor<string, []>("op_1913_cast_fp16")];
+            tensor<string, []> var_1915_equation_0 = const()[name = tensor<string, []>("op_1915_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1915_cast_fp16 = einsum(equation = var_1915_equation_0, values = (var_1833_cast_fp16_0, var_1894_cast_fp16))[name = tensor<string, []>("op_1915_cast_fp16")];
+            tensor<string, []> var_1917_equation_0 = const()[name = tensor<string, []>("op_1917_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1917_cast_fp16 = einsum(equation = var_1917_equation_0, values = (var_1833_cast_fp16_1, var_1895_cast_fp16))[name = tensor<string, []>("op_1917_cast_fp16")];
+            tensor<string, []> var_1919_equation_0 = const()[name = tensor<string, []>("op_1919_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1919_cast_fp16 = einsum(equation = var_1919_equation_0, values = (var_1833_cast_fp16_2, var_1896_cast_fp16))[name = tensor<string, []>("op_1919_cast_fp16")];
+            tensor<string, []> var_1921_equation_0 = const()[name = tensor<string, []>("op_1921_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1921_cast_fp16 = einsum(equation = var_1921_equation_0, values = (var_1833_cast_fp16_3, var_1897_cast_fp16))[name = tensor<string, []>("op_1921_cast_fp16")];
+            tensor<string, []> var_1923_equation_0 = const()[name = tensor<string, []>("op_1923_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1923_cast_fp16 = einsum(equation = var_1923_equation_0, values = (var_1833_cast_fp16_4, var_1898_cast_fp16))[name = tensor<string, []>("op_1923_cast_fp16")];
+            tensor<string, []> var_1925_equation_0 = const()[name = tensor<string, []>("op_1925_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1925_cast_fp16 = einsum(equation = var_1925_equation_0, values = (var_1833_cast_fp16_5, var_1899_cast_fp16))[name = tensor<string, []>("op_1925_cast_fp16")];
+            tensor<string, []> var_1927_equation_0 = const()[name = tensor<string, []>("op_1927_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1927_cast_fp16 = einsum(equation = var_1927_equation_0, values = (var_1833_cast_fp16_6, var_1900_cast_fp16))[name = tensor<string, []>("op_1927_cast_fp16")];
+            tensor<string, []> var_1929_equation_0 = const()[name = tensor<string, []>("op_1929_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1929_cast_fp16 = einsum(equation = var_1929_equation_0, values = (var_1833_cast_fp16_7, var_1901_cast_fp16))[name = tensor<string, []>("op_1929_cast_fp16")];
+            tensor<string, []> var_1931_equation_0 = const()[name = tensor<string, []>("op_1931_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1931_cast_fp16 = einsum(equation = var_1931_equation_0, values = (var_1833_cast_fp16_8, var_1902_cast_fp16))[name = tensor<string, []>("op_1931_cast_fp16")];
+            tensor<string, []> var_1933_equation_0 = const()[name = tensor<string, []>("op_1933_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1933_cast_fp16 = einsum(equation = var_1933_equation_0, values = (var_1833_cast_fp16_9, var_1903_cast_fp16))[name = tensor<string, []>("op_1933_cast_fp16")];
+            tensor<string, []> var_1935_equation_0 = const()[name = tensor<string, []>("op_1935_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1935_cast_fp16 = einsum(equation = var_1935_equation_0, values = (var_1833_cast_fp16_10, var_1904_cast_fp16))[name = tensor<string, []>("op_1935_cast_fp16")];
+            tensor<string, []> var_1937_equation_0 = const()[name = tensor<string, []>("op_1937_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1937_cast_fp16 = einsum(equation = var_1937_equation_0, values = (var_1833_cast_fp16_11, var_1905_cast_fp16))[name = tensor<string, []>("op_1937_cast_fp16")];
+            tensor<string, []> var_1939_equation_0 = const()[name = tensor<string, []>("op_1939_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1939_cast_fp16 = einsum(equation = var_1939_equation_0, values = (var_1833_cast_fp16_12, var_1906_cast_fp16))[name = tensor<string, []>("op_1939_cast_fp16")];
+            tensor<string, []> var_1941_equation_0 = const()[name = tensor<string, []>("op_1941_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1941_cast_fp16 = einsum(equation = var_1941_equation_0, values = (var_1833_cast_fp16_13, var_1907_cast_fp16))[name = tensor<string, []>("op_1941_cast_fp16")];
+            tensor<string, []> var_1943_equation_0 = const()[name = tensor<string, []>("op_1943_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1943_cast_fp16 = einsum(equation = var_1943_equation_0, values = (var_1833_cast_fp16_14, var_1908_cast_fp16))[name = tensor<string, []>("op_1943_cast_fp16")];
+            tensor<string, []> var_1945_equation_0 = const()[name = tensor<string, []>("op_1945_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1945_cast_fp16 = einsum(equation = var_1945_equation_0, values = (var_1833_cast_fp16_15, var_1909_cast_fp16))[name = tensor<string, []>("op_1945_cast_fp16")];
+            tensor<string, []> var_1947_equation_0 = const()[name = tensor<string, []>("op_1947_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1947_cast_fp16 = einsum(equation = var_1947_equation_0, values = (var_1833_cast_fp16_16, var_1910_cast_fp16))[name = tensor<string, []>("op_1947_cast_fp16")];
+            tensor<string, []> var_1949_equation_0 = const()[name = tensor<string, []>("op_1949_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1949_cast_fp16 = einsum(equation = var_1949_equation_0, values = (var_1833_cast_fp16_17, var_1911_cast_fp16))[name = tensor<string, []>("op_1949_cast_fp16")];
+            tensor<string, []> var_1951_equation_0 = const()[name = tensor<string, []>("op_1951_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1951_cast_fp16 = einsum(equation = var_1951_equation_0, values = (var_1833_cast_fp16_18, var_1912_cast_fp16))[name = tensor<string, []>("op_1951_cast_fp16")];
+            tensor<string, []> var_1953_equation_0 = const()[name = tensor<string, []>("op_1953_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1953_cast_fp16 = einsum(equation = var_1953_equation_0, values = (var_1833_cast_fp16_19, var_1913_cast_fp16))[name = tensor<string, []>("op_1953_cast_fp16")];
+            tensor<bool, []> input_65_interleave_0 = const()[name = tensor<string, []>("input_65_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_65_cast_fp16 = concat(axis = var_1738, interleave = input_65_interleave_0, values = (var_1915_cast_fp16, var_1917_cast_fp16, var_1919_cast_fp16, var_1921_cast_fp16, var_1923_cast_fp16, var_1925_cast_fp16, var_1927_cast_fp16, var_1929_cast_fp16, var_1931_cast_fp16, var_1933_cast_fp16, var_1935_cast_fp16, var_1937_cast_fp16, var_1939_cast_fp16, var_1941_cast_fp16, var_1943_cast_fp16, var_1945_cast_fp16, var_1947_cast_fp16, var_1949_cast_fp16, var_1951_cast_fp16, var_1953_cast_fp16))[name = tensor<string, []>("input_65_cast_fp16")];
+            tensor<string, []> var_1962_pad_type_0 = const()[name = tensor<string, []>("op_1962_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1962_strides_0 = const()[name = tensor<string, []>("op_1962_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1962_pad_0 = const()[name = tensor<string, []>("op_1962_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1962_dilations_0 = const()[name = tensor<string, []>("op_1962_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1962_groups_0 = const()[name = tensor<string, []>("op_1962_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_6_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(260251072)))];
+            tensor<fp16, [1280]> blocks_6_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263527936)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1962_cast_fp16 = conv(bias = blocks_6_attn_out_bias_to_fp16, dilations = var_1962_dilations_0, groups = var_1962_groups_0, pad = var_1962_pad_0, pad_type = var_1962_pad_type_0, strides = var_1962_strides_0, weight = blocks_6_attn_out_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("op_1962_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = var_1962_cast_fp16)[name = tensor<string, []>("inputs_27_cast_fp16")];
+            tensor<int32, [1]> input_67_axes_0 = const()[name = tensor<string, []>("input_67_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_67_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_67_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263530560)))];
+            tensor<fp16, [1280]> input_67_beta_0_to_fp16 = const()[name = tensor<string, []>("input_67_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263533184)))];
+            tensor<fp16, []> var_1972_to_fp16 = const()[name = tensor<string, []>("op_1972_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_67_cast_fp16 = layer_norm(axes = input_67_axes_0, beta = input_67_beta_0_to_fp16, epsilon = var_1972_to_fp16, gamma = input_67_gamma_0_to_fp16, x = inputs_27_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
+            tensor<string, []> input_69_pad_type_0 = const()[name = tensor<string, []>("input_69_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_69_strides_0 = const()[name = tensor<string, []>("input_69_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_69_pad_0 = const()[name = tensor<string, []>("input_69_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_69_dilations_0 = const()[name = tensor<string, []>("input_69_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_69_groups_0 = const()[name = tensor<string, []>("input_69_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_6_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263535808)))];
+            tensor<fp16, [5120]> blocks_6_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(276643072)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_69_cast_fp16 = conv(bias = blocks_6_mlp_0_bias_to_fp16, dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = blocks_6_mlp_0_weight_to_fp16, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
+            tensor<string, []> input_71_mode_0 = const()[name = tensor<string, []>("input_71_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
+            tensor<string, []> var_1998_pad_type_0 = const()[name = tensor<string, []>("op_1998_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1998_strides_0 = const()[name = tensor<string, []>("op_1998_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1998_pad_0 = const()[name = tensor<string, []>("op_1998_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1998_dilations_0 = const()[name = tensor<string, []>("op_1998_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1998_groups_0 = const()[name = tensor<string, []>("op_1998_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_6_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(276653376)))];
+            tensor<fp16, [1280]> blocks_6_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289760640)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1998_cast_fp16 = conv(bias = blocks_6_mlp_2_bias_to_fp16, dilations = var_1998_dilations_0, groups = var_1998_groups_0, pad = var_1998_pad_0, pad_type = var_1998_pad_type_0, strides = var_1998_strides_0, weight = blocks_6_mlp_2_weight_to_fp16, x = input_71_cast_fp16)[name = tensor<string, []>("op_1998_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = var_1998_cast_fp16)[name = tensor<string, []>("inputs_29_cast_fp16")];
+            tensor<int32, []> var_2007 = const()[name = tensor<string, []>("op_2007"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_73_axes_0 = const()[name = tensor<string, []>("input_73_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_73_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_73_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289763264)))];
+            tensor<fp16, [1280]> input_73_beta_0_to_fp16 = const()[name = tensor<string, []>("input_73_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289765888)))];
+            tensor<fp16, []> var_2023_to_fp16 = const()[name = tensor<string, []>("op_2023_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_73_cast_fp16 = layer_norm(axes = input_73_axes_0, beta = input_73_beta_0_to_fp16, epsilon = var_2023_to_fp16, gamma = input_73_gamma_0_to_fp16, x = inputs_29_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
+            tensor<string, []> q_15_pad_type_0 = const()[name = tensor<string, []>("q_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_15_strides_0 = const()[name = tensor<string, []>("q_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_15_pad_0 = const()[name = tensor<string, []>("q_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_15_dilations_0 = const()[name = tensor<string, []>("q_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_15_groups_0 = const()[name = tensor<string, []>("q_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_2058_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2058_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289768512)))];
+            tensor<fp16, [1280]> var_2058_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2058_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293045376)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2058_cast_fp16 = conv(bias = var_2058_bias_0_to_fp16, dilations = q_15_dilations_0, groups = q_15_groups_0, pad = q_15_pad_0, pad_type = q_15_pad_type_0, strides = q_15_strides_0, weight = var_2058_weight_0_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("op_2058_cast_fp16")];
+            tensor<string, []> k_15_pad_type_0 = const()[name = tensor<string, []>("k_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_15_strides_0 = const()[name = tensor<string, []>("k_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_15_pad_0 = const()[name = tensor<string, []>("k_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_15_dilations_0 = const()[name = tensor<string, []>("k_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_15_groups_0 = const()[name = tensor<string, []>("k_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_7_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293048000)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_15_cast_fp16 = conv(dilations = k_15_dilations_0, groups = k_15_groups_0, pad = k_15_pad_0, pad_type = k_15_pad_type_0, strides = k_15_strides_0, weight = blocks_7_attn_key_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("k_15_cast_fp16")];
+            tensor<string, []> var_2056_pad_type_0 = const()[name = tensor<string, []>("op_2056_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2056_strides_0 = const()[name = tensor<string, []>("op_2056_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2056_pad_0 = const()[name = tensor<string, []>("op_2056_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2056_dilations_0 = const()[name = tensor<string, []>("op_2056_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2056_groups_0 = const()[name = tensor<string, []>("op_2056_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_7_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(296324864)))];
+            tensor<fp16, [1280]> blocks_7_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(299601728)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2056_cast_fp16 = conv(bias = blocks_7_attn_value_bias_to_fp16, dilations = var_2056_dilations_0, groups = var_2056_groups_0, pad = var_2056_pad_0, pad_type = var_2056_pad_type_0, strides = var_2056_strides_0, weight = blocks_7_attn_value_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("op_2056_cast_fp16")];
+            tensor<int32, [20]> tile_21 = const()[name = tensor<string, []>("tile_21"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2059_axis_0 = const()[name = tensor<string, []>("op_2059_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_19 = split(axis = var_2059_axis_0, split_sizes = tile_21, x = var_2058_cast_fp16)[name = tensor<string, []>("op_2059_cast_fp16")];
+            tensor<int32, [4]> var_2080_perm_0 = const()[name = tensor<string, []>("op_2080_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_22 = const()[name = tensor<string, []>("tile_22"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2081_axis_0 = const()[name = tensor<string, []>("op_2081_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_2080_cast_fp16 = transpose(perm = var_2080_perm_0, x = k_15_cast_fp16)[name = tensor<string, []>("transpose_25")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_19 = split(axis = var_2081_axis_0, split_sizes = tile_22, x = var_2080_cast_fp16)[name = tensor<string, []>("op_2081_cast_fp16")];
+            tensor<int32, [20]> tile_23 = const()[name = tensor<string, []>("tile_23"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2102_axis_0 = const()[name = tensor<string, []>("op_2102_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_19 = split(axis = var_2102_axis_0, split_sizes = tile_23, x = var_2056_cast_fp16)[name = tensor<string, []>("op_2102_cast_fp16")];
+            tensor<string, []> aw_281_equation_0 = const()[name = tensor<string, []>("aw_281_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_281_cast_fp16 = einsum(equation = aw_281_equation_0, values = (var_2081_cast_fp16_0, var_2059_cast_fp16_0))[name = tensor<string, []>("aw_281_cast_fp16")];
+            tensor<string, []> aw_283_equation_0 = const()[name = tensor<string, []>("aw_283_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_283_cast_fp16 = einsum(equation = aw_283_equation_0, values = (var_2081_cast_fp16_1, var_2059_cast_fp16_1))[name = tensor<string, []>("aw_283_cast_fp16")];
+            tensor<string, []> aw_285_equation_0 = const()[name = tensor<string, []>("aw_285_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_285_cast_fp16 = einsum(equation = aw_285_equation_0, values = (var_2081_cast_fp16_2, var_2059_cast_fp16_2))[name = tensor<string, []>("aw_285_cast_fp16")];
+            tensor<string, []> aw_287_equation_0 = const()[name = tensor<string, []>("aw_287_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_287_cast_fp16 = einsum(equation = aw_287_equation_0, values = (var_2081_cast_fp16_3, var_2059_cast_fp16_3))[name = tensor<string, []>("aw_287_cast_fp16")];
+            tensor<string, []> aw_289_equation_0 = const()[name = tensor<string, []>("aw_289_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_289_cast_fp16 = einsum(equation = aw_289_equation_0, values = (var_2081_cast_fp16_4, var_2059_cast_fp16_4))[name = tensor<string, []>("aw_289_cast_fp16")];
+            tensor<string, []> aw_291_equation_0 = const()[name = tensor<string, []>("aw_291_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_291_cast_fp16 = einsum(equation = aw_291_equation_0, values = (var_2081_cast_fp16_5, var_2059_cast_fp16_5))[name = tensor<string, []>("aw_291_cast_fp16")];
+            tensor<string, []> aw_293_equation_0 = const()[name = tensor<string, []>("aw_293_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_293_cast_fp16 = einsum(equation = aw_293_equation_0, values = (var_2081_cast_fp16_6, var_2059_cast_fp16_6))[name = tensor<string, []>("aw_293_cast_fp16")];
+            tensor<string, []> aw_295_equation_0 = const()[name = tensor<string, []>("aw_295_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_295_cast_fp16 = einsum(equation = aw_295_equation_0, values = (var_2081_cast_fp16_7, var_2059_cast_fp16_7))[name = tensor<string, []>("aw_295_cast_fp16")];
+            tensor<string, []> aw_297_equation_0 = const()[name = tensor<string, []>("aw_297_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_297_cast_fp16 = einsum(equation = aw_297_equation_0, values = (var_2081_cast_fp16_8, var_2059_cast_fp16_8))[name = tensor<string, []>("aw_297_cast_fp16")];
+            tensor<string, []> aw_299_equation_0 = const()[name = tensor<string, []>("aw_299_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_299_cast_fp16 = einsum(equation = aw_299_equation_0, values = (var_2081_cast_fp16_9, var_2059_cast_fp16_9))[name = tensor<string, []>("aw_299_cast_fp16")];
+            tensor<string, []> aw_301_equation_0 = const()[name = tensor<string, []>("aw_301_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_301_cast_fp16 = einsum(equation = aw_301_equation_0, values = (var_2081_cast_fp16_10, var_2059_cast_fp16_10))[name = tensor<string, []>("aw_301_cast_fp16")];
+            tensor<string, []> aw_303_equation_0 = const()[name = tensor<string, []>("aw_303_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_303_cast_fp16 = einsum(equation = aw_303_equation_0, values = (var_2081_cast_fp16_11, var_2059_cast_fp16_11))[name = tensor<string, []>("aw_303_cast_fp16")];
+            tensor<string, []> aw_305_equation_0 = const()[name = tensor<string, []>("aw_305_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_305_cast_fp16 = einsum(equation = aw_305_equation_0, values = (var_2081_cast_fp16_12, var_2059_cast_fp16_12))[name = tensor<string, []>("aw_305_cast_fp16")];
+            tensor<string, []> aw_307_equation_0 = const()[name = tensor<string, []>("aw_307_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_307_cast_fp16 = einsum(equation = aw_307_equation_0, values = (var_2081_cast_fp16_13, var_2059_cast_fp16_13))[name = tensor<string, []>("aw_307_cast_fp16")];
+            tensor<string, []> aw_309_equation_0 = const()[name = tensor<string, []>("aw_309_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_309_cast_fp16 = einsum(equation = aw_309_equation_0, values = (var_2081_cast_fp16_14, var_2059_cast_fp16_14))[name = tensor<string, []>("aw_309_cast_fp16")];
+            tensor<string, []> aw_311_equation_0 = const()[name = tensor<string, []>("aw_311_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_311_cast_fp16 = einsum(equation = aw_311_equation_0, values = (var_2081_cast_fp16_15, var_2059_cast_fp16_15))[name = tensor<string, []>("aw_311_cast_fp16")];
+            tensor<string, []> aw_313_equation_0 = const()[name = tensor<string, []>("aw_313_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_313_cast_fp16 = einsum(equation = aw_313_equation_0, values = (var_2081_cast_fp16_16, var_2059_cast_fp16_16))[name = tensor<string, []>("aw_313_cast_fp16")];
+            tensor<string, []> aw_315_equation_0 = const()[name = tensor<string, []>("aw_315_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_315_cast_fp16 = einsum(equation = aw_315_equation_0, values = (var_2081_cast_fp16_17, var_2059_cast_fp16_17))[name = tensor<string, []>("aw_315_cast_fp16")];
+            tensor<string, []> aw_317_equation_0 = const()[name = tensor<string, []>("aw_317_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_317_cast_fp16 = einsum(equation = aw_317_equation_0, values = (var_2081_cast_fp16_18, var_2059_cast_fp16_18))[name = tensor<string, []>("aw_317_cast_fp16")];
+            tensor<string, []> aw_319_equation_0 = const()[name = tensor<string, []>("aw_319_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_319_cast_fp16 = einsum(equation = aw_319_equation_0, values = (var_2081_cast_fp16_19, var_2059_cast_fp16_19))[name = tensor<string, []>("aw_319_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2163_cast_fp16 = softmax(axis = var_2007, x = aw_281_cast_fp16)[name = tensor<string, []>("op_2163_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2164_cast_fp16 = softmax(axis = var_2007, x = aw_283_cast_fp16)[name = tensor<string, []>("op_2164_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2165_cast_fp16 = softmax(axis = var_2007, x = aw_285_cast_fp16)[name = tensor<string, []>("op_2165_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2166_cast_fp16 = softmax(axis = var_2007, x = aw_287_cast_fp16)[name = tensor<string, []>("op_2166_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2167_cast_fp16 = softmax(axis = var_2007, x = aw_289_cast_fp16)[name = tensor<string, []>("op_2167_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2168_cast_fp16 = softmax(axis = var_2007, x = aw_291_cast_fp16)[name = tensor<string, []>("op_2168_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2169_cast_fp16 = softmax(axis = var_2007, x = aw_293_cast_fp16)[name = tensor<string, []>("op_2169_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2170_cast_fp16 = softmax(axis = var_2007, x = aw_295_cast_fp16)[name = tensor<string, []>("op_2170_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2171_cast_fp16 = softmax(axis = var_2007, x = aw_297_cast_fp16)[name = tensor<string, []>("op_2171_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2172_cast_fp16 = softmax(axis = var_2007, x = aw_299_cast_fp16)[name = tensor<string, []>("op_2172_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2173_cast_fp16 = softmax(axis = var_2007, x = aw_301_cast_fp16)[name = tensor<string, []>("op_2173_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2174_cast_fp16 = softmax(axis = var_2007, x = aw_303_cast_fp16)[name = tensor<string, []>("op_2174_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2175_cast_fp16 = softmax(axis = var_2007, x = aw_305_cast_fp16)[name = tensor<string, []>("op_2175_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2176_cast_fp16 = softmax(axis = var_2007, x = aw_307_cast_fp16)[name = tensor<string, []>("op_2176_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2177_cast_fp16 = softmax(axis = var_2007, x = aw_309_cast_fp16)[name = tensor<string, []>("op_2177_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2178_cast_fp16 = softmax(axis = var_2007, x = aw_311_cast_fp16)[name = tensor<string, []>("op_2178_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2179_cast_fp16 = softmax(axis = var_2007, x = aw_313_cast_fp16)[name = tensor<string, []>("op_2179_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2180_cast_fp16 = softmax(axis = var_2007, x = aw_315_cast_fp16)[name = tensor<string, []>("op_2180_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2181_cast_fp16 = softmax(axis = var_2007, x = aw_317_cast_fp16)[name = tensor<string, []>("op_2181_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2182_cast_fp16 = softmax(axis = var_2007, x = aw_319_cast_fp16)[name = tensor<string, []>("op_2182_cast_fp16")];
+            tensor<string, []> var_2184_equation_0 = const()[name = tensor<string, []>("op_2184_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2184_cast_fp16 = einsum(equation = var_2184_equation_0, values = (var_2102_cast_fp16_0, var_2163_cast_fp16))[name = tensor<string, []>("op_2184_cast_fp16")];
+            tensor<string, []> var_2186_equation_0 = const()[name = tensor<string, []>("op_2186_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16 = einsum(equation = var_2186_equation_0, values = (var_2102_cast_fp16_1, var_2164_cast_fp16))[name = tensor<string, []>("op_2186_cast_fp16")];
+            tensor<string, []> var_2188_equation_0 = const()[name = tensor<string, []>("op_2188_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2188_cast_fp16 = einsum(equation = var_2188_equation_0, values = (var_2102_cast_fp16_2, var_2165_cast_fp16))[name = tensor<string, []>("op_2188_cast_fp16")];
+            tensor<string, []> var_2190_equation_0 = const()[name = tensor<string, []>("op_2190_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2190_cast_fp16 = einsum(equation = var_2190_equation_0, values = (var_2102_cast_fp16_3, var_2166_cast_fp16))[name = tensor<string, []>("op_2190_cast_fp16")];
+            tensor<string, []> var_2192_equation_0 = const()[name = tensor<string, []>("op_2192_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2192_cast_fp16 = einsum(equation = var_2192_equation_0, values = (var_2102_cast_fp16_4, var_2167_cast_fp16))[name = tensor<string, []>("op_2192_cast_fp16")];
+            tensor<string, []> var_2194_equation_0 = const()[name = tensor<string, []>("op_2194_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2194_cast_fp16 = einsum(equation = var_2194_equation_0, values = (var_2102_cast_fp16_5, var_2168_cast_fp16))[name = tensor<string, []>("op_2194_cast_fp16")];
+            tensor<string, []> var_2196_equation_0 = const()[name = tensor<string, []>("op_2196_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2196_cast_fp16 = einsum(equation = var_2196_equation_0, values = (var_2102_cast_fp16_6, var_2169_cast_fp16))[name = tensor<string, []>("op_2196_cast_fp16")];
+            tensor<string, []> var_2198_equation_0 = const()[name = tensor<string, []>("op_2198_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2198_cast_fp16 = einsum(equation = var_2198_equation_0, values = (var_2102_cast_fp16_7, var_2170_cast_fp16))[name = tensor<string, []>("op_2198_cast_fp16")];
+            tensor<string, []> var_2200_equation_0 = const()[name = tensor<string, []>("op_2200_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2200_cast_fp16 = einsum(equation = var_2200_equation_0, values = (var_2102_cast_fp16_8, var_2171_cast_fp16))[name = tensor<string, []>("op_2200_cast_fp16")];
+            tensor<string, []> var_2202_equation_0 = const()[name = tensor<string, []>("op_2202_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2202_cast_fp16 = einsum(equation = var_2202_equation_0, values = (var_2102_cast_fp16_9, var_2172_cast_fp16))[name = tensor<string, []>("op_2202_cast_fp16")];
+            tensor<string, []> var_2204_equation_0 = const()[name = tensor<string, []>("op_2204_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2204_cast_fp16 = einsum(equation = var_2204_equation_0, values = (var_2102_cast_fp16_10, var_2173_cast_fp16))[name = tensor<string, []>("op_2204_cast_fp16")];
+            tensor<string, []> var_2206_equation_0 = const()[name = tensor<string, []>("op_2206_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2206_cast_fp16 = einsum(equation = var_2206_equation_0, values = (var_2102_cast_fp16_11, var_2174_cast_fp16))[name = tensor<string, []>("op_2206_cast_fp16")];
+            tensor<string, []> var_2208_equation_0 = const()[name = tensor<string, []>("op_2208_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2208_cast_fp16 = einsum(equation = var_2208_equation_0, values = (var_2102_cast_fp16_12, var_2175_cast_fp16))[name = tensor<string, []>("op_2208_cast_fp16")];
+            tensor<string, []> var_2210_equation_0 = const()[name = tensor<string, []>("op_2210_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2210_cast_fp16 = einsum(equation = var_2210_equation_0, values = (var_2102_cast_fp16_13, var_2176_cast_fp16))[name = tensor<string, []>("op_2210_cast_fp16")];
+            tensor<string, []> var_2212_equation_0 = const()[name = tensor<string, []>("op_2212_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2212_cast_fp16 = einsum(equation = var_2212_equation_0, values = (var_2102_cast_fp16_14, var_2177_cast_fp16))[name = tensor<string, []>("op_2212_cast_fp16")];
+            tensor<string, []> var_2214_equation_0 = const()[name = tensor<string, []>("op_2214_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2214_cast_fp16 = einsum(equation = var_2214_equation_0, values = (var_2102_cast_fp16_15, var_2178_cast_fp16))[name = tensor<string, []>("op_2214_cast_fp16")];
+            tensor<string, []> var_2216_equation_0 = const()[name = tensor<string, []>("op_2216_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2216_cast_fp16 = einsum(equation = var_2216_equation_0, values = (var_2102_cast_fp16_16, var_2179_cast_fp16))[name = tensor<string, []>("op_2216_cast_fp16")];
+            tensor<string, []> var_2218_equation_0 = const()[name = tensor<string, []>("op_2218_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2218_cast_fp16 = einsum(equation = var_2218_equation_0, values = (var_2102_cast_fp16_17, var_2180_cast_fp16))[name = tensor<string, []>("op_2218_cast_fp16")];
+            tensor<string, []> var_2220_equation_0 = const()[name = tensor<string, []>("op_2220_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2220_cast_fp16 = einsum(equation = var_2220_equation_0, values = (var_2102_cast_fp16_18, var_2181_cast_fp16))[name = tensor<string, []>("op_2220_cast_fp16")];
+            tensor<string, []> var_2222_equation_0 = const()[name = tensor<string, []>("op_2222_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2222_cast_fp16 = einsum(equation = var_2222_equation_0, values = (var_2102_cast_fp16_19, var_2182_cast_fp16))[name = tensor<string, []>("op_2222_cast_fp16")];
+            tensor<bool, []> input_75_interleave_0 = const()[name = tensor<string, []>("input_75_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_75_cast_fp16 = concat(axis = var_2007, interleave = input_75_interleave_0, values = (var_2184_cast_fp16, var_2186_cast_fp16, var_2188_cast_fp16, var_2190_cast_fp16, var_2192_cast_fp16, var_2194_cast_fp16, var_2196_cast_fp16, var_2198_cast_fp16, var_2200_cast_fp16, var_2202_cast_fp16, var_2204_cast_fp16, var_2206_cast_fp16, var_2208_cast_fp16, var_2210_cast_fp16, var_2212_cast_fp16, var_2214_cast_fp16, var_2216_cast_fp16, var_2218_cast_fp16, var_2220_cast_fp16, var_2222_cast_fp16))[name = tensor<string, []>("input_75_cast_fp16")];
+            tensor<string, []> var_2231_pad_type_0 = const()[name = tensor<string, []>("op_2231_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2231_strides_0 = const()[name = tensor<string, []>("op_2231_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2231_pad_0 = const()[name = tensor<string, []>("op_2231_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2231_dilations_0 = const()[name = tensor<string, []>("op_2231_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2231_groups_0 = const()[name = tensor<string, []>("op_2231_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_7_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(299604352)))];
+            tensor<fp16, [1280]> blocks_7_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(302881216)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2231_cast_fp16 = conv(bias = blocks_7_attn_out_bias_to_fp16, dilations = var_2231_dilations_0, groups = var_2231_groups_0, pad = var_2231_pad_0, pad_type = var_2231_pad_type_0, strides = var_2231_strides_0, weight = blocks_7_attn_out_weight_to_fp16, x = input_75_cast_fp16)[name = tensor<string, []>("op_2231_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = var_2231_cast_fp16)[name = tensor<string, []>("inputs_31_cast_fp16")];
+            tensor<int32, [1]> input_77_axes_0 = const()[name = tensor<string, []>("input_77_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_77_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_77_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(302883840)))];
+            tensor<fp16, [1280]> input_77_beta_0_to_fp16 = const()[name = tensor<string, []>("input_77_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(302886464)))];
+            tensor<fp16, []> var_2241_to_fp16 = const()[name = tensor<string, []>("op_2241_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_77_cast_fp16 = layer_norm(axes = input_77_axes_0, beta = input_77_beta_0_to_fp16, epsilon = var_2241_to_fp16, gamma = input_77_gamma_0_to_fp16, x = inputs_31_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
+            tensor<string, []> input_79_pad_type_0 = const()[name = tensor<string, []>("input_79_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_79_strides_0 = const()[name = tensor<string, []>("input_79_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_79_pad_0 = const()[name = tensor<string, []>("input_79_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_79_dilations_0 = const()[name = tensor<string, []>("input_79_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_79_groups_0 = const()[name = tensor<string, []>("input_79_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_7_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(302889088)))];
+            tensor<fp16, [5120]> blocks_7_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(315996352)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_79_cast_fp16 = conv(bias = blocks_7_mlp_0_bias_to_fp16, dilations = input_79_dilations_0, groups = input_79_groups_0, pad = input_79_pad_0, pad_type = input_79_pad_type_0, strides = input_79_strides_0, weight = blocks_7_mlp_0_weight_to_fp16, x = input_77_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
+            tensor<string, []> input_81_mode_0 = const()[name = tensor<string, []>("input_81_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_81_cast_fp16 = gelu(mode = input_81_mode_0, x = input_79_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
+            tensor<string, []> var_2267_pad_type_0 = const()[name = tensor<string, []>("op_2267_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2267_strides_0 = const()[name = tensor<string, []>("op_2267_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2267_pad_0 = const()[name = tensor<string, []>("op_2267_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2267_dilations_0 = const()[name = tensor<string, []>("op_2267_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2267_groups_0 = const()[name = tensor<string, []>("op_2267_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_7_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(316006656)))];
+            tensor<fp16, [1280]> blocks_7_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(329113920)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2267_cast_fp16 = conv(bias = blocks_7_mlp_2_bias_to_fp16, dilations = var_2267_dilations_0, groups = var_2267_groups_0, pad = var_2267_pad_0, pad_type = var_2267_pad_type_0, strides = var_2267_strides_0, weight = blocks_7_mlp_2_weight_to_fp16, x = input_81_cast_fp16)[name = tensor<string, []>("op_2267_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = var_2267_cast_fp16)[name = tensor<string, []>("inputs_33_cast_fp16")];
+            tensor<int32, []> var_2276 = const()[name = tensor<string, []>("op_2276"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_83_axes_0 = const()[name = tensor<string, []>("input_83_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_83_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_83_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(329116544)))];
+            tensor<fp16, [1280]> input_83_beta_0_to_fp16 = const()[name = tensor<string, []>("input_83_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(329119168)))];
+            tensor<fp16, []> var_2292_to_fp16 = const()[name = tensor<string, []>("op_2292_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_83_cast_fp16 = layer_norm(axes = input_83_axes_0, beta = input_83_beta_0_to_fp16, epsilon = var_2292_to_fp16, gamma = input_83_gamma_0_to_fp16, x = inputs_33_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
+            tensor<string, []> q_17_pad_type_0 = const()[name = tensor<string, []>("q_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_17_strides_0 = const()[name = tensor<string, []>("q_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_17_pad_0 = const()[name = tensor<string, []>("q_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_17_dilations_0 = const()[name = tensor<string, []>("q_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_17_groups_0 = const()[name = tensor<string, []>("q_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_2327_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2327_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(329121792)))];
+            tensor<fp16, [1280]> var_2327_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2327_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(332398656)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2327_cast_fp16 = conv(bias = var_2327_bias_0_to_fp16, dilations = q_17_dilations_0, groups = q_17_groups_0, pad = q_17_pad_0, pad_type = q_17_pad_type_0, strides = q_17_strides_0, weight = var_2327_weight_0_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("op_2327_cast_fp16")];
+            tensor<string, []> k_17_pad_type_0 = const()[name = tensor<string, []>("k_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_17_strides_0 = const()[name = tensor<string, []>("k_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_17_pad_0 = const()[name = tensor<string, []>("k_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_17_dilations_0 = const()[name = tensor<string, []>("k_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_17_groups_0 = const()[name = tensor<string, []>("k_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_8_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(332401280)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_17_cast_fp16 = conv(dilations = k_17_dilations_0, groups = k_17_groups_0, pad = k_17_pad_0, pad_type = k_17_pad_type_0, strides = k_17_strides_0, weight = blocks_8_attn_key_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("k_17_cast_fp16")];
+            tensor<string, []> var_2325_pad_type_0 = const()[name = tensor<string, []>("op_2325_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2325_strides_0 = const()[name = tensor<string, []>("op_2325_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2325_pad_0 = const()[name = tensor<string, []>("op_2325_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2325_dilations_0 = const()[name = tensor<string, []>("op_2325_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2325_groups_0 = const()[name = tensor<string, []>("op_2325_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_8_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(335678144)))];
+            tensor<fp16, [1280]> blocks_8_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(338955008)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2325_cast_fp16 = conv(bias = blocks_8_attn_value_bias_to_fp16, dilations = var_2325_dilations_0, groups = var_2325_groups_0, pad = var_2325_pad_0, pad_type = var_2325_pad_type_0, strides = var_2325_strides_0, weight = blocks_8_attn_value_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("op_2325_cast_fp16")];
+            tensor<int32, [20]> tile_24 = const()[name = tensor<string, []>("tile_24"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2328_axis_0 = const()[name = tensor<string, []>("op_2328_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_19 = split(axis = var_2328_axis_0, split_sizes = tile_24, x = var_2327_cast_fp16)[name = tensor<string, []>("op_2328_cast_fp16")];
+            tensor<int32, [4]> var_2349_perm_0 = const()[name = tensor<string, []>("op_2349_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_25 = const()[name = tensor<string, []>("tile_25"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2350_axis_0 = const()[name = tensor<string, []>("op_2350_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_2349_cast_fp16 = transpose(perm = var_2349_perm_0, x = k_17_cast_fp16)[name = tensor<string, []>("transpose_24")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_19 = split(axis = var_2350_axis_0, split_sizes = tile_25, x = var_2349_cast_fp16)[name = tensor<string, []>("op_2350_cast_fp16")];
+            tensor<int32, [20]> tile_26 = const()[name = tensor<string, []>("tile_26"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2371_axis_0 = const()[name = tensor<string, []>("op_2371_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_19 = split(axis = var_2371_axis_0, split_sizes = tile_26, x = var_2325_cast_fp16)[name = tensor<string, []>("op_2371_cast_fp16")];
+            tensor<string, []> aw_321_equation_0 = const()[name = tensor<string, []>("aw_321_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_321_cast_fp16 = einsum(equation = aw_321_equation_0, values = (var_2350_cast_fp16_0, var_2328_cast_fp16_0))[name = tensor<string, []>("aw_321_cast_fp16")];
+            tensor<string, []> aw_323_equation_0 = const()[name = tensor<string, []>("aw_323_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_323_cast_fp16 = einsum(equation = aw_323_equation_0, values = (var_2350_cast_fp16_1, var_2328_cast_fp16_1))[name = tensor<string, []>("aw_323_cast_fp16")];
+            tensor<string, []> aw_325_equation_0 = const()[name = tensor<string, []>("aw_325_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_325_cast_fp16 = einsum(equation = aw_325_equation_0, values = (var_2350_cast_fp16_2, var_2328_cast_fp16_2))[name = tensor<string, []>("aw_325_cast_fp16")];
+            tensor<string, []> aw_327_equation_0 = const()[name = tensor<string, []>("aw_327_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_327_cast_fp16 = einsum(equation = aw_327_equation_0, values = (var_2350_cast_fp16_3, var_2328_cast_fp16_3))[name = tensor<string, []>("aw_327_cast_fp16")];
+            tensor<string, []> aw_329_equation_0 = const()[name = tensor<string, []>("aw_329_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_329_cast_fp16 = einsum(equation = aw_329_equation_0, values = (var_2350_cast_fp16_4, var_2328_cast_fp16_4))[name = tensor<string, []>("aw_329_cast_fp16")];
+            tensor<string, []> aw_331_equation_0 = const()[name = tensor<string, []>("aw_331_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_331_cast_fp16 = einsum(equation = aw_331_equation_0, values = (var_2350_cast_fp16_5, var_2328_cast_fp16_5))[name = tensor<string, []>("aw_331_cast_fp16")];
+            tensor<string, []> aw_333_equation_0 = const()[name = tensor<string, []>("aw_333_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_333_cast_fp16 = einsum(equation = aw_333_equation_0, values = (var_2350_cast_fp16_6, var_2328_cast_fp16_6))[name = tensor<string, []>("aw_333_cast_fp16")];
+            tensor<string, []> aw_335_equation_0 = const()[name = tensor<string, []>("aw_335_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_335_cast_fp16 = einsum(equation = aw_335_equation_0, values = (var_2350_cast_fp16_7, var_2328_cast_fp16_7))[name = tensor<string, []>("aw_335_cast_fp16")];
+            tensor<string, []> aw_337_equation_0 = const()[name = tensor<string, []>("aw_337_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_337_cast_fp16 = einsum(equation = aw_337_equation_0, values = (var_2350_cast_fp16_8, var_2328_cast_fp16_8))[name = tensor<string, []>("aw_337_cast_fp16")];
+            tensor<string, []> aw_339_equation_0 = const()[name = tensor<string, []>("aw_339_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_339_cast_fp16 = einsum(equation = aw_339_equation_0, values = (var_2350_cast_fp16_9, var_2328_cast_fp16_9))[name = tensor<string, []>("aw_339_cast_fp16")];
+            tensor<string, []> aw_341_equation_0 = const()[name = tensor<string, []>("aw_341_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_341_cast_fp16 = einsum(equation = aw_341_equation_0, values = (var_2350_cast_fp16_10, var_2328_cast_fp16_10))[name = tensor<string, []>("aw_341_cast_fp16")];
+            tensor<string, []> aw_343_equation_0 = const()[name = tensor<string, []>("aw_343_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_343_cast_fp16 = einsum(equation = aw_343_equation_0, values = (var_2350_cast_fp16_11, var_2328_cast_fp16_11))[name = tensor<string, []>("aw_343_cast_fp16")];
+            tensor<string, []> aw_345_equation_0 = const()[name = tensor<string, []>("aw_345_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_345_cast_fp16 = einsum(equation = aw_345_equation_0, values = (var_2350_cast_fp16_12, var_2328_cast_fp16_12))[name = tensor<string, []>("aw_345_cast_fp16")];
+            tensor<string, []> aw_347_equation_0 = const()[name = tensor<string, []>("aw_347_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_347_cast_fp16 = einsum(equation = aw_347_equation_0, values = (var_2350_cast_fp16_13, var_2328_cast_fp16_13))[name = tensor<string, []>("aw_347_cast_fp16")];
+            tensor<string, []> aw_349_equation_0 = const()[name = tensor<string, []>("aw_349_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_349_cast_fp16 = einsum(equation = aw_349_equation_0, values = (var_2350_cast_fp16_14, var_2328_cast_fp16_14))[name = tensor<string, []>("aw_349_cast_fp16")];
+            tensor<string, []> aw_351_equation_0 = const()[name = tensor<string, []>("aw_351_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_351_cast_fp16 = einsum(equation = aw_351_equation_0, values = (var_2350_cast_fp16_15, var_2328_cast_fp16_15))[name = tensor<string, []>("aw_351_cast_fp16")];
+            tensor<string, []> aw_353_equation_0 = const()[name = tensor<string, []>("aw_353_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_353_cast_fp16 = einsum(equation = aw_353_equation_0, values = (var_2350_cast_fp16_16, var_2328_cast_fp16_16))[name = tensor<string, []>("aw_353_cast_fp16")];
+            tensor<string, []> aw_355_equation_0 = const()[name = tensor<string, []>("aw_355_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_355_cast_fp16 = einsum(equation = aw_355_equation_0, values = (var_2350_cast_fp16_17, var_2328_cast_fp16_17))[name = tensor<string, []>("aw_355_cast_fp16")];
+            tensor<string, []> aw_357_equation_0 = const()[name = tensor<string, []>("aw_357_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_357_cast_fp16 = einsum(equation = aw_357_equation_0, values = (var_2350_cast_fp16_18, var_2328_cast_fp16_18))[name = tensor<string, []>("aw_357_cast_fp16")];
+            tensor<string, []> aw_359_equation_0 = const()[name = tensor<string, []>("aw_359_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_359_cast_fp16 = einsum(equation = aw_359_equation_0, values = (var_2350_cast_fp16_19, var_2328_cast_fp16_19))[name = tensor<string, []>("aw_359_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2432_cast_fp16 = softmax(axis = var_2276, x = aw_321_cast_fp16)[name = tensor<string, []>("op_2432_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2433_cast_fp16 = softmax(axis = var_2276, x = aw_323_cast_fp16)[name = tensor<string, []>("op_2433_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2434_cast_fp16 = softmax(axis = var_2276, x = aw_325_cast_fp16)[name = tensor<string, []>("op_2434_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2435_cast_fp16 = softmax(axis = var_2276, x = aw_327_cast_fp16)[name = tensor<string, []>("op_2435_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2436_cast_fp16 = softmax(axis = var_2276, x = aw_329_cast_fp16)[name = tensor<string, []>("op_2436_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2437_cast_fp16 = softmax(axis = var_2276, x = aw_331_cast_fp16)[name = tensor<string, []>("op_2437_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2438_cast_fp16 = softmax(axis = var_2276, x = aw_333_cast_fp16)[name = tensor<string, []>("op_2438_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2439_cast_fp16 = softmax(axis = var_2276, x = aw_335_cast_fp16)[name = tensor<string, []>("op_2439_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2440_cast_fp16 = softmax(axis = var_2276, x = aw_337_cast_fp16)[name = tensor<string, []>("op_2440_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2441_cast_fp16 = softmax(axis = var_2276, x = aw_339_cast_fp16)[name = tensor<string, []>("op_2441_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2442_cast_fp16 = softmax(axis = var_2276, x = aw_341_cast_fp16)[name = tensor<string, []>("op_2442_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2443_cast_fp16 = softmax(axis = var_2276, x = aw_343_cast_fp16)[name = tensor<string, []>("op_2443_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2444_cast_fp16 = softmax(axis = var_2276, x = aw_345_cast_fp16)[name = tensor<string, []>("op_2444_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2445_cast_fp16 = softmax(axis = var_2276, x = aw_347_cast_fp16)[name = tensor<string, []>("op_2445_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2446_cast_fp16 = softmax(axis = var_2276, x = aw_349_cast_fp16)[name = tensor<string, []>("op_2446_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2447_cast_fp16 = softmax(axis = var_2276, x = aw_351_cast_fp16)[name = tensor<string, []>("op_2447_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2448_cast_fp16 = softmax(axis = var_2276, x = aw_353_cast_fp16)[name = tensor<string, []>("op_2448_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2449_cast_fp16 = softmax(axis = var_2276, x = aw_355_cast_fp16)[name = tensor<string, []>("op_2449_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2450_cast_fp16 = softmax(axis = var_2276, x = aw_357_cast_fp16)[name = tensor<string, []>("op_2450_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2451_cast_fp16 = softmax(axis = var_2276, x = aw_359_cast_fp16)[name = tensor<string, []>("op_2451_cast_fp16")];
+            tensor<string, []> var_2453_equation_0 = const()[name = tensor<string, []>("op_2453_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2453_cast_fp16 = einsum(equation = var_2453_equation_0, values = (var_2371_cast_fp16_0, var_2432_cast_fp16))[name = tensor<string, []>("op_2453_cast_fp16")];
+            tensor<string, []> var_2455_equation_0 = const()[name = tensor<string, []>("op_2455_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2455_cast_fp16 = einsum(equation = var_2455_equation_0, values = (var_2371_cast_fp16_1, var_2433_cast_fp16))[name = tensor<string, []>("op_2455_cast_fp16")];
+            tensor<string, []> var_2457_equation_0 = const()[name = tensor<string, []>("op_2457_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2457_cast_fp16 = einsum(equation = var_2457_equation_0, values = (var_2371_cast_fp16_2, var_2434_cast_fp16))[name = tensor<string, []>("op_2457_cast_fp16")];
+            tensor<string, []> var_2459_equation_0 = const()[name = tensor<string, []>("op_2459_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2459_cast_fp16 = einsum(equation = var_2459_equation_0, values = (var_2371_cast_fp16_3, var_2435_cast_fp16))[name = tensor<string, []>("op_2459_cast_fp16")];
+            tensor<string, []> var_2461_equation_0 = const()[name = tensor<string, []>("op_2461_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2461_cast_fp16 = einsum(equation = var_2461_equation_0, values = (var_2371_cast_fp16_4, var_2436_cast_fp16))[name = tensor<string, []>("op_2461_cast_fp16")];
+            tensor<string, []> var_2463_equation_0 = const()[name = tensor<string, []>("op_2463_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2463_cast_fp16 = einsum(equation = var_2463_equation_0, values = (var_2371_cast_fp16_5, var_2437_cast_fp16))[name = tensor<string, []>("op_2463_cast_fp16")];
+            tensor<string, []> var_2465_equation_0 = const()[name = tensor<string, []>("op_2465_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2465_cast_fp16 = einsum(equation = var_2465_equation_0, values = (var_2371_cast_fp16_6, var_2438_cast_fp16))[name = tensor<string, []>("op_2465_cast_fp16")];
+            tensor<string, []> var_2467_equation_0 = const()[name = tensor<string, []>("op_2467_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2467_cast_fp16 = einsum(equation = var_2467_equation_0, values = (var_2371_cast_fp16_7, var_2439_cast_fp16))[name = tensor<string, []>("op_2467_cast_fp16")];
+            tensor<string, []> var_2469_equation_0 = const()[name = tensor<string, []>("op_2469_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2469_cast_fp16 = einsum(equation = var_2469_equation_0, values = (var_2371_cast_fp16_8, var_2440_cast_fp16))[name = tensor<string, []>("op_2469_cast_fp16")];
+            tensor<string, []> var_2471_equation_0 = const()[name = tensor<string, []>("op_2471_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2471_cast_fp16 = einsum(equation = var_2471_equation_0, values = (var_2371_cast_fp16_9, var_2441_cast_fp16))[name = tensor<string, []>("op_2471_cast_fp16")];
+            tensor<string, []> var_2473_equation_0 = const()[name = tensor<string, []>("op_2473_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2473_cast_fp16 = einsum(equation = var_2473_equation_0, values = (var_2371_cast_fp16_10, var_2442_cast_fp16))[name = tensor<string, []>("op_2473_cast_fp16")];
+            tensor<string, []> var_2475_equation_0 = const()[name = tensor<string, []>("op_2475_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2475_cast_fp16 = einsum(equation = var_2475_equation_0, values = (var_2371_cast_fp16_11, var_2443_cast_fp16))[name = tensor<string, []>("op_2475_cast_fp16")];
+            tensor<string, []> var_2477_equation_0 = const()[name = tensor<string, []>("op_2477_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2477_cast_fp16 = einsum(equation = var_2477_equation_0, values = (var_2371_cast_fp16_12, var_2444_cast_fp16))[name = tensor<string, []>("op_2477_cast_fp16")];
+            tensor<string, []> var_2479_equation_0 = const()[name = tensor<string, []>("op_2479_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2479_cast_fp16 = einsum(equation = var_2479_equation_0, values = (var_2371_cast_fp16_13, var_2445_cast_fp16))[name = tensor<string, []>("op_2479_cast_fp16")];
+            tensor<string, []> var_2481_equation_0 = const()[name = tensor<string, []>("op_2481_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2481_cast_fp16 = einsum(equation = var_2481_equation_0, values = (var_2371_cast_fp16_14, var_2446_cast_fp16))[name = tensor<string, []>("op_2481_cast_fp16")];
+            tensor<string, []> var_2483_equation_0 = const()[name = tensor<string, []>("op_2483_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2483_cast_fp16 = einsum(equation = var_2483_equation_0, values = (var_2371_cast_fp16_15, var_2447_cast_fp16))[name = tensor<string, []>("op_2483_cast_fp16")];
+            tensor<string, []> var_2485_equation_0 = const()[name = tensor<string, []>("op_2485_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2485_cast_fp16 = einsum(equation = var_2485_equation_0, values = (var_2371_cast_fp16_16, var_2448_cast_fp16))[name = tensor<string, []>("op_2485_cast_fp16")];
+            tensor<string, []> var_2487_equation_0 = const()[name = tensor<string, []>("op_2487_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2487_cast_fp16 = einsum(equation = var_2487_equation_0, values = (var_2371_cast_fp16_17, var_2449_cast_fp16))[name = tensor<string, []>("op_2487_cast_fp16")];
+            tensor<string, []> var_2489_equation_0 = const()[name = tensor<string, []>("op_2489_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2489_cast_fp16 = einsum(equation = var_2489_equation_0, values = (var_2371_cast_fp16_18, var_2450_cast_fp16))[name = tensor<string, []>("op_2489_cast_fp16")];
+            tensor<string, []> var_2491_equation_0 = const()[name = tensor<string, []>("op_2491_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2491_cast_fp16 = einsum(equation = var_2491_equation_0, values = (var_2371_cast_fp16_19, var_2451_cast_fp16))[name = tensor<string, []>("op_2491_cast_fp16")];
+            tensor<bool, []> input_85_interleave_0 = const()[name = tensor<string, []>("input_85_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_85_cast_fp16 = concat(axis = var_2276, interleave = input_85_interleave_0, values = (var_2453_cast_fp16, var_2455_cast_fp16, var_2457_cast_fp16, var_2459_cast_fp16, var_2461_cast_fp16, var_2463_cast_fp16, var_2465_cast_fp16, var_2467_cast_fp16, var_2469_cast_fp16, var_2471_cast_fp16, var_2473_cast_fp16, var_2475_cast_fp16, var_2477_cast_fp16, var_2479_cast_fp16, var_2481_cast_fp16, var_2483_cast_fp16, var_2485_cast_fp16, var_2487_cast_fp16, var_2489_cast_fp16, var_2491_cast_fp16))[name = tensor<string, []>("input_85_cast_fp16")];
+            tensor<string, []> var_2500_pad_type_0 = const()[name = tensor<string, []>("op_2500_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2500_strides_0 = const()[name = tensor<string, []>("op_2500_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2500_pad_0 = const()[name = tensor<string, []>("op_2500_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2500_dilations_0 = const()[name = tensor<string, []>("op_2500_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2500_groups_0 = const()[name = tensor<string, []>("op_2500_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_8_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(338957632)))];
+            tensor<fp16, [1280]> blocks_8_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(342234496)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2500_cast_fp16 = conv(bias = blocks_8_attn_out_bias_to_fp16, dilations = var_2500_dilations_0, groups = var_2500_groups_0, pad = var_2500_pad_0, pad_type = var_2500_pad_type_0, strides = var_2500_strides_0, weight = blocks_8_attn_out_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("op_2500_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = var_2500_cast_fp16)[name = tensor<string, []>("inputs_35_cast_fp16")];
+            tensor<int32, [1]> input_87_axes_0 = const()[name = tensor<string, []>("input_87_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_87_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_87_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(342237120)))];
+            tensor<fp16, [1280]> input_87_beta_0_to_fp16 = const()[name = tensor<string, []>("input_87_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(342239744)))];
+            tensor<fp16, []> var_2510_to_fp16 = const()[name = tensor<string, []>("op_2510_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_87_cast_fp16 = layer_norm(axes = input_87_axes_0, beta = input_87_beta_0_to_fp16, epsilon = var_2510_to_fp16, gamma = input_87_gamma_0_to_fp16, x = inputs_35_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
+            tensor<string, []> input_89_pad_type_0 = const()[name = tensor<string, []>("input_89_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_89_strides_0 = const()[name = tensor<string, []>("input_89_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_89_pad_0 = const()[name = tensor<string, []>("input_89_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_89_dilations_0 = const()[name = tensor<string, []>("input_89_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_89_groups_0 = const()[name = tensor<string, []>("input_89_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_8_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(342242368)))];
+            tensor<fp16, [5120]> blocks_8_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(355349632)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_89_cast_fp16 = conv(bias = blocks_8_mlp_0_bias_to_fp16, dilations = input_89_dilations_0, groups = input_89_groups_0, pad = input_89_pad_0, pad_type = input_89_pad_type_0, strides = input_89_strides_0, weight = blocks_8_mlp_0_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
+            tensor<string, []> input_91_mode_0 = const()[name = tensor<string, []>("input_91_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_91_cast_fp16 = gelu(mode = input_91_mode_0, x = input_89_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
+            tensor<string, []> var_2536_pad_type_0 = const()[name = tensor<string, []>("op_2536_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2536_strides_0 = const()[name = tensor<string, []>("op_2536_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2536_pad_0 = const()[name = tensor<string, []>("op_2536_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2536_dilations_0 = const()[name = tensor<string, []>("op_2536_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2536_groups_0 = const()[name = tensor<string, []>("op_2536_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_8_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(355359936)))];
+            tensor<fp16, [1280]> blocks_8_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368467200)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2536_cast_fp16 = conv(bias = blocks_8_mlp_2_bias_to_fp16, dilations = var_2536_dilations_0, groups = var_2536_groups_0, pad = var_2536_pad_0, pad_type = var_2536_pad_type_0, strides = var_2536_strides_0, weight = blocks_8_mlp_2_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("op_2536_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = var_2536_cast_fp16)[name = tensor<string, []>("inputs_37_cast_fp16")];
+            tensor<int32, []> var_2545 = const()[name = tensor<string, []>("op_2545"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_93_axes_0 = const()[name = tensor<string, []>("input_93_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_93_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_93_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368469824)))];
+            tensor<fp16, [1280]> input_93_beta_0_to_fp16 = const()[name = tensor<string, []>("input_93_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368472448)))];
+            tensor<fp16, []> var_2561_to_fp16 = const()[name = tensor<string, []>("op_2561_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_93_cast_fp16 = layer_norm(axes = input_93_axes_0, beta = input_93_beta_0_to_fp16, epsilon = var_2561_to_fp16, gamma = input_93_gamma_0_to_fp16, x = inputs_37_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
+            tensor<string, []> q_19_pad_type_0 = const()[name = tensor<string, []>("q_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_19_strides_0 = const()[name = tensor<string, []>("q_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_19_pad_0 = const()[name = tensor<string, []>("q_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_19_dilations_0 = const()[name = tensor<string, []>("q_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_19_groups_0 = const()[name = tensor<string, []>("q_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_2596_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2596_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368475072)))];
+            tensor<fp16, [1280]> var_2596_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2596_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(371751936)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2596_cast_fp16 = conv(bias = var_2596_bias_0_to_fp16, dilations = q_19_dilations_0, groups = q_19_groups_0, pad = q_19_pad_0, pad_type = q_19_pad_type_0, strides = q_19_strides_0, weight = var_2596_weight_0_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("op_2596_cast_fp16")];
+            tensor<string, []> k_19_pad_type_0 = const()[name = tensor<string, []>("k_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_19_strides_0 = const()[name = tensor<string, []>("k_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_19_pad_0 = const()[name = tensor<string, []>("k_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_19_dilations_0 = const()[name = tensor<string, []>("k_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_19_groups_0 = const()[name = tensor<string, []>("k_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_9_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(371754560)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_19_cast_fp16 = conv(dilations = k_19_dilations_0, groups = k_19_groups_0, pad = k_19_pad_0, pad_type = k_19_pad_type_0, strides = k_19_strides_0, weight = blocks_9_attn_key_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("k_19_cast_fp16")];
+            tensor<string, []> var_2594_pad_type_0 = const()[name = tensor<string, []>("op_2594_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2594_strides_0 = const()[name = tensor<string, []>("op_2594_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2594_pad_0 = const()[name = tensor<string, []>("op_2594_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2594_dilations_0 = const()[name = tensor<string, []>("op_2594_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2594_groups_0 = const()[name = tensor<string, []>("op_2594_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_9_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(375031424)))];
+            tensor<fp16, [1280]> blocks_9_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(378308288)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2594_cast_fp16 = conv(bias = blocks_9_attn_value_bias_to_fp16, dilations = var_2594_dilations_0, groups = var_2594_groups_0, pad = var_2594_pad_0, pad_type = var_2594_pad_type_0, strides = var_2594_strides_0, weight = blocks_9_attn_value_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("op_2594_cast_fp16")];
+            tensor<int32, [20]> tile_27 = const()[name = tensor<string, []>("tile_27"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2597_axis_0 = const()[name = tensor<string, []>("op_2597_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_19 = split(axis = var_2597_axis_0, split_sizes = tile_27, x = var_2596_cast_fp16)[name = tensor<string, []>("op_2597_cast_fp16")];
+            tensor<int32, [4]> var_2618_perm_0 = const()[name = tensor<string, []>("op_2618_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_28 = const()[name = tensor<string, []>("tile_28"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2619_axis_0 = const()[name = tensor<string, []>("op_2619_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_2618_cast_fp16 = transpose(perm = var_2618_perm_0, x = k_19_cast_fp16)[name = tensor<string, []>("transpose_23")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_19 = split(axis = var_2619_axis_0, split_sizes = tile_28, x = var_2618_cast_fp16)[name = tensor<string, []>("op_2619_cast_fp16")];
+            tensor<int32, [20]> tile_29 = const()[name = tensor<string, []>("tile_29"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2640_axis_0 = const()[name = tensor<string, []>("op_2640_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_19 = split(axis = var_2640_axis_0, split_sizes = tile_29, x = var_2594_cast_fp16)[name = tensor<string, []>("op_2640_cast_fp16")];
+            tensor<string, []> aw_361_equation_0 = const()[name = tensor<string, []>("aw_361_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_361_cast_fp16 = einsum(equation = aw_361_equation_0, values = (var_2619_cast_fp16_0, var_2597_cast_fp16_0))[name = tensor<string, []>("aw_361_cast_fp16")];
+            tensor<string, []> aw_363_equation_0 = const()[name = tensor<string, []>("aw_363_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_363_cast_fp16 = einsum(equation = aw_363_equation_0, values = (var_2619_cast_fp16_1, var_2597_cast_fp16_1))[name = tensor<string, []>("aw_363_cast_fp16")];
+            tensor<string, []> aw_365_equation_0 = const()[name = tensor<string, []>("aw_365_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_365_cast_fp16 = einsum(equation = aw_365_equation_0, values = (var_2619_cast_fp16_2, var_2597_cast_fp16_2))[name = tensor<string, []>("aw_365_cast_fp16")];
+            tensor<string, []> aw_367_equation_0 = const()[name = tensor<string, []>("aw_367_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_367_cast_fp16 = einsum(equation = aw_367_equation_0, values = (var_2619_cast_fp16_3, var_2597_cast_fp16_3))[name = tensor<string, []>("aw_367_cast_fp16")];
+            tensor<string, []> aw_369_equation_0 = const()[name = tensor<string, []>("aw_369_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_369_cast_fp16 = einsum(equation = aw_369_equation_0, values = (var_2619_cast_fp16_4, var_2597_cast_fp16_4))[name = tensor<string, []>("aw_369_cast_fp16")];
+            tensor<string, []> aw_371_equation_0 = const()[name = tensor<string, []>("aw_371_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_371_cast_fp16 = einsum(equation = aw_371_equation_0, values = (var_2619_cast_fp16_5, var_2597_cast_fp16_5))[name = tensor<string, []>("aw_371_cast_fp16")];
+            tensor<string, []> aw_373_equation_0 = const()[name = tensor<string, []>("aw_373_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_373_cast_fp16 = einsum(equation = aw_373_equation_0, values = (var_2619_cast_fp16_6, var_2597_cast_fp16_6))[name = tensor<string, []>("aw_373_cast_fp16")];
+            tensor<string, []> aw_375_equation_0 = const()[name = tensor<string, []>("aw_375_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_375_cast_fp16 = einsum(equation = aw_375_equation_0, values = (var_2619_cast_fp16_7, var_2597_cast_fp16_7))[name = tensor<string, []>("aw_375_cast_fp16")];
+            tensor<string, []> aw_377_equation_0 = const()[name = tensor<string, []>("aw_377_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_377_cast_fp16 = einsum(equation = aw_377_equation_0, values = (var_2619_cast_fp16_8, var_2597_cast_fp16_8))[name = tensor<string, []>("aw_377_cast_fp16")];
+            tensor<string, []> aw_379_equation_0 = const()[name = tensor<string, []>("aw_379_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_379_cast_fp16 = einsum(equation = aw_379_equation_0, values = (var_2619_cast_fp16_9, var_2597_cast_fp16_9))[name = tensor<string, []>("aw_379_cast_fp16")];
+            tensor<string, []> aw_381_equation_0 = const()[name = tensor<string, []>("aw_381_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_381_cast_fp16 = einsum(equation = aw_381_equation_0, values = (var_2619_cast_fp16_10, var_2597_cast_fp16_10))[name = tensor<string, []>("aw_381_cast_fp16")];
+            tensor<string, []> aw_383_equation_0 = const()[name = tensor<string, []>("aw_383_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_383_cast_fp16 = einsum(equation = aw_383_equation_0, values = (var_2619_cast_fp16_11, var_2597_cast_fp16_11))[name = tensor<string, []>("aw_383_cast_fp16")];
+            tensor<string, []> aw_385_equation_0 = const()[name = tensor<string, []>("aw_385_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_385_cast_fp16 = einsum(equation = aw_385_equation_0, values = (var_2619_cast_fp16_12, var_2597_cast_fp16_12))[name = tensor<string, []>("aw_385_cast_fp16")];
+            tensor<string, []> aw_387_equation_0 = const()[name = tensor<string, []>("aw_387_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_387_cast_fp16 = einsum(equation = aw_387_equation_0, values = (var_2619_cast_fp16_13, var_2597_cast_fp16_13))[name = tensor<string, []>("aw_387_cast_fp16")];
+            tensor<string, []> aw_389_equation_0 = const()[name = tensor<string, []>("aw_389_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_389_cast_fp16 = einsum(equation = aw_389_equation_0, values = (var_2619_cast_fp16_14, var_2597_cast_fp16_14))[name = tensor<string, []>("aw_389_cast_fp16")];
+            tensor<string, []> aw_391_equation_0 = const()[name = tensor<string, []>("aw_391_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_391_cast_fp16 = einsum(equation = aw_391_equation_0, values = (var_2619_cast_fp16_15, var_2597_cast_fp16_15))[name = tensor<string, []>("aw_391_cast_fp16")];
+            tensor<string, []> aw_393_equation_0 = const()[name = tensor<string, []>("aw_393_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_393_cast_fp16 = einsum(equation = aw_393_equation_0, values = (var_2619_cast_fp16_16, var_2597_cast_fp16_16))[name = tensor<string, []>("aw_393_cast_fp16")];
+            tensor<string, []> aw_395_equation_0 = const()[name = tensor<string, []>("aw_395_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_395_cast_fp16 = einsum(equation = aw_395_equation_0, values = (var_2619_cast_fp16_17, var_2597_cast_fp16_17))[name = tensor<string, []>("aw_395_cast_fp16")];
+            tensor<string, []> aw_397_equation_0 = const()[name = tensor<string, []>("aw_397_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_397_cast_fp16 = einsum(equation = aw_397_equation_0, values = (var_2619_cast_fp16_18, var_2597_cast_fp16_18))[name = tensor<string, []>("aw_397_cast_fp16")];
+            tensor<string, []> aw_399_equation_0 = const()[name = tensor<string, []>("aw_399_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_399_cast_fp16 = einsum(equation = aw_399_equation_0, values = (var_2619_cast_fp16_19, var_2597_cast_fp16_19))[name = tensor<string, []>("aw_399_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2701_cast_fp16 = softmax(axis = var_2545, x = aw_361_cast_fp16)[name = tensor<string, []>("op_2701_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2702_cast_fp16 = softmax(axis = var_2545, x = aw_363_cast_fp16)[name = tensor<string, []>("op_2702_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2703_cast_fp16 = softmax(axis = var_2545, x = aw_365_cast_fp16)[name = tensor<string, []>("op_2703_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2704_cast_fp16 = softmax(axis = var_2545, x = aw_367_cast_fp16)[name = tensor<string, []>("op_2704_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2705_cast_fp16 = softmax(axis = var_2545, x = aw_369_cast_fp16)[name = tensor<string, []>("op_2705_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2706_cast_fp16 = softmax(axis = var_2545, x = aw_371_cast_fp16)[name = tensor<string, []>("op_2706_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2707_cast_fp16 = softmax(axis = var_2545, x = aw_373_cast_fp16)[name = tensor<string, []>("op_2707_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2708_cast_fp16 = softmax(axis = var_2545, x = aw_375_cast_fp16)[name = tensor<string, []>("op_2708_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2709_cast_fp16 = softmax(axis = var_2545, x = aw_377_cast_fp16)[name = tensor<string, []>("op_2709_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2710_cast_fp16 = softmax(axis = var_2545, x = aw_379_cast_fp16)[name = tensor<string, []>("op_2710_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2711_cast_fp16 = softmax(axis = var_2545, x = aw_381_cast_fp16)[name = tensor<string, []>("op_2711_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2712_cast_fp16 = softmax(axis = var_2545, x = aw_383_cast_fp16)[name = tensor<string, []>("op_2712_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2713_cast_fp16 = softmax(axis = var_2545, x = aw_385_cast_fp16)[name = tensor<string, []>("op_2713_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2714_cast_fp16 = softmax(axis = var_2545, x = aw_387_cast_fp16)[name = tensor<string, []>("op_2714_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2715_cast_fp16 = softmax(axis = var_2545, x = aw_389_cast_fp16)[name = tensor<string, []>("op_2715_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2716_cast_fp16 = softmax(axis = var_2545, x = aw_391_cast_fp16)[name = tensor<string, []>("op_2716_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2717_cast_fp16 = softmax(axis = var_2545, x = aw_393_cast_fp16)[name = tensor<string, []>("op_2717_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2718_cast_fp16 = softmax(axis = var_2545, x = aw_395_cast_fp16)[name = tensor<string, []>("op_2718_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2719_cast_fp16 = softmax(axis = var_2545, x = aw_397_cast_fp16)[name = tensor<string, []>("op_2719_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2720_cast_fp16 = softmax(axis = var_2545, x = aw_399_cast_fp16)[name = tensor<string, []>("op_2720_cast_fp16")];
+            tensor<string, []> var_2722_equation_0 = const()[name = tensor<string, []>("op_2722_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2722_cast_fp16 = einsum(equation = var_2722_equation_0, values = (var_2640_cast_fp16_0, var_2701_cast_fp16))[name = tensor<string, []>("op_2722_cast_fp16")];
+            tensor<string, []> var_2724_equation_0 = const()[name = tensor<string, []>("op_2724_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2724_cast_fp16 = einsum(equation = var_2724_equation_0, values = (var_2640_cast_fp16_1, var_2702_cast_fp16))[name = tensor<string, []>("op_2724_cast_fp16")];
+            tensor<string, []> var_2726_equation_0 = const()[name = tensor<string, []>("op_2726_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2726_cast_fp16 = einsum(equation = var_2726_equation_0, values = (var_2640_cast_fp16_2, var_2703_cast_fp16))[name = tensor<string, []>("op_2726_cast_fp16")];
+            tensor<string, []> var_2728_equation_0 = const()[name = tensor<string, []>("op_2728_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2728_cast_fp16 = einsum(equation = var_2728_equation_0, values = (var_2640_cast_fp16_3, var_2704_cast_fp16))[name = tensor<string, []>("op_2728_cast_fp16")];
+            tensor<string, []> var_2730_equation_0 = const()[name = tensor<string, []>("op_2730_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2730_cast_fp16 = einsum(equation = var_2730_equation_0, values = (var_2640_cast_fp16_4, var_2705_cast_fp16))[name = tensor<string, []>("op_2730_cast_fp16")];
+            tensor<string, []> var_2732_equation_0 = const()[name = tensor<string, []>("op_2732_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2732_cast_fp16 = einsum(equation = var_2732_equation_0, values = (var_2640_cast_fp16_5, var_2706_cast_fp16))[name = tensor<string, []>("op_2732_cast_fp16")];
+            tensor<string, []> var_2734_equation_0 = const()[name = tensor<string, []>("op_2734_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2734_cast_fp16 = einsum(equation = var_2734_equation_0, values = (var_2640_cast_fp16_6, var_2707_cast_fp16))[name = tensor<string, []>("op_2734_cast_fp16")];
+            tensor<string, []> var_2736_equation_0 = const()[name = tensor<string, []>("op_2736_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2736_cast_fp16 = einsum(equation = var_2736_equation_0, values = (var_2640_cast_fp16_7, var_2708_cast_fp16))[name = tensor<string, []>("op_2736_cast_fp16")];
+            tensor<string, []> var_2738_equation_0 = const()[name = tensor<string, []>("op_2738_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2738_cast_fp16 = einsum(equation = var_2738_equation_0, values = (var_2640_cast_fp16_8, var_2709_cast_fp16))[name = tensor<string, []>("op_2738_cast_fp16")];
+            tensor<string, []> var_2740_equation_0 = const()[name = tensor<string, []>("op_2740_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2740_cast_fp16 = einsum(equation = var_2740_equation_0, values = (var_2640_cast_fp16_9, var_2710_cast_fp16))[name = tensor<string, []>("op_2740_cast_fp16")];
+            tensor<string, []> var_2742_equation_0 = const()[name = tensor<string, []>("op_2742_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2742_cast_fp16 = einsum(equation = var_2742_equation_0, values = (var_2640_cast_fp16_10, var_2711_cast_fp16))[name = tensor<string, []>("op_2742_cast_fp16")];
+            tensor<string, []> var_2744_equation_0 = const()[name = tensor<string, []>("op_2744_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2744_cast_fp16 = einsum(equation = var_2744_equation_0, values = (var_2640_cast_fp16_11, var_2712_cast_fp16))[name = tensor<string, []>("op_2744_cast_fp16")];
+            tensor<string, []> var_2746_equation_0 = const()[name = tensor<string, []>("op_2746_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2746_cast_fp16 = einsum(equation = var_2746_equation_0, values = (var_2640_cast_fp16_12, var_2713_cast_fp16))[name = tensor<string, []>("op_2746_cast_fp16")];
+            tensor<string, []> var_2748_equation_0 = const()[name = tensor<string, []>("op_2748_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2748_cast_fp16 = einsum(equation = var_2748_equation_0, values = (var_2640_cast_fp16_13, var_2714_cast_fp16))[name = tensor<string, []>("op_2748_cast_fp16")];
+            tensor<string, []> var_2750_equation_0 = const()[name = tensor<string, []>("op_2750_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2750_cast_fp16 = einsum(equation = var_2750_equation_0, values = (var_2640_cast_fp16_14, var_2715_cast_fp16))[name = tensor<string, []>("op_2750_cast_fp16")];
+            tensor<string, []> var_2752_equation_0 = const()[name = tensor<string, []>("op_2752_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2752_cast_fp16 = einsum(equation = var_2752_equation_0, values = (var_2640_cast_fp16_15, var_2716_cast_fp16))[name = tensor<string, []>("op_2752_cast_fp16")];
+            tensor<string, []> var_2754_equation_0 = const()[name = tensor<string, []>("op_2754_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2754_cast_fp16 = einsum(equation = var_2754_equation_0, values = (var_2640_cast_fp16_16, var_2717_cast_fp16))[name = tensor<string, []>("op_2754_cast_fp16")];
+            tensor<string, []> var_2756_equation_0 = const()[name = tensor<string, []>("op_2756_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2756_cast_fp16 = einsum(equation = var_2756_equation_0, values = (var_2640_cast_fp16_17, var_2718_cast_fp16))[name = tensor<string, []>("op_2756_cast_fp16")];
+            tensor<string, []> var_2758_equation_0 = const()[name = tensor<string, []>("op_2758_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2758_cast_fp16 = einsum(equation = var_2758_equation_0, values = (var_2640_cast_fp16_18, var_2719_cast_fp16))[name = tensor<string, []>("op_2758_cast_fp16")];
+            tensor<string, []> var_2760_equation_0 = const()[name = tensor<string, []>("op_2760_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2760_cast_fp16 = einsum(equation = var_2760_equation_0, values = (var_2640_cast_fp16_19, var_2720_cast_fp16))[name = tensor<string, []>("op_2760_cast_fp16")];
+            tensor<bool, []> input_95_interleave_0 = const()[name = tensor<string, []>("input_95_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_95_cast_fp16 = concat(axis = var_2545, interleave = input_95_interleave_0, values = (var_2722_cast_fp16, var_2724_cast_fp16, var_2726_cast_fp16, var_2728_cast_fp16, var_2730_cast_fp16, var_2732_cast_fp16, var_2734_cast_fp16, var_2736_cast_fp16, var_2738_cast_fp16, var_2740_cast_fp16, var_2742_cast_fp16, var_2744_cast_fp16, var_2746_cast_fp16, var_2748_cast_fp16, var_2750_cast_fp16, var_2752_cast_fp16, var_2754_cast_fp16, var_2756_cast_fp16, var_2758_cast_fp16, var_2760_cast_fp16))[name = tensor<string, []>("input_95_cast_fp16")];
+            tensor<string, []> var_2769_pad_type_0 = const()[name = tensor<string, []>("op_2769_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2769_strides_0 = const()[name = tensor<string, []>("op_2769_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2769_pad_0 = const()[name = tensor<string, []>("op_2769_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2769_dilations_0 = const()[name = tensor<string, []>("op_2769_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2769_groups_0 = const()[name = tensor<string, []>("op_2769_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_9_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(378310912)))];
+            tensor<fp16, [1280]> blocks_9_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(381587776)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2769_cast_fp16 = conv(bias = blocks_9_attn_out_bias_to_fp16, dilations = var_2769_dilations_0, groups = var_2769_groups_0, pad = var_2769_pad_0, pad_type = var_2769_pad_type_0, strides = var_2769_strides_0, weight = blocks_9_attn_out_weight_to_fp16, x = input_95_cast_fp16)[name = tensor<string, []>("op_2769_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = var_2769_cast_fp16)[name = tensor<string, []>("inputs_39_cast_fp16")];
+            tensor<int32, [1]> input_97_axes_0 = const()[name = tensor<string, []>("input_97_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_97_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_97_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(381590400)))];
+            tensor<fp16, [1280]> input_97_beta_0_to_fp16 = const()[name = tensor<string, []>("input_97_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(381593024)))];
+            tensor<fp16, []> var_2779_to_fp16 = const()[name = tensor<string, []>("op_2779_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_97_cast_fp16 = layer_norm(axes = input_97_axes_0, beta = input_97_beta_0_to_fp16, epsilon = var_2779_to_fp16, gamma = input_97_gamma_0_to_fp16, x = inputs_39_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")];
+            tensor<string, []> input_99_pad_type_0 = const()[name = tensor<string, []>("input_99_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_99_strides_0 = const()[name = tensor<string, []>("input_99_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_99_pad_0 = const()[name = tensor<string, []>("input_99_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_99_dilations_0 = const()[name = tensor<string, []>("input_99_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_99_groups_0 = const()[name = tensor<string, []>("input_99_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_9_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(381595648)))];
+            tensor<fp16, [5120]> blocks_9_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(394702912)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_99_cast_fp16 = conv(bias = blocks_9_mlp_0_bias_to_fp16, dilations = input_99_dilations_0, groups = input_99_groups_0, pad = input_99_pad_0, pad_type = input_99_pad_type_0, strides = input_99_strides_0, weight = blocks_9_mlp_0_weight_to_fp16, x = input_97_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
+            tensor<string, []> input_101_mode_0 = const()[name = tensor<string, []>("input_101_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_101_cast_fp16 = gelu(mode = input_101_mode_0, x = input_99_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
+            tensor<string, []> var_2805_pad_type_0 = const()[name = tensor<string, []>("op_2805_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2805_strides_0 = const()[name = tensor<string, []>("op_2805_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2805_pad_0 = const()[name = tensor<string, []>("op_2805_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2805_dilations_0 = const()[name = tensor<string, []>("op_2805_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2805_groups_0 = const()[name = tensor<string, []>("op_2805_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_9_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(394713216)))];
+            tensor<fp16, [1280]> blocks_9_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(407820480)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2805_cast_fp16 = conv(bias = blocks_9_mlp_2_bias_to_fp16, dilations = var_2805_dilations_0, groups = var_2805_groups_0, pad = var_2805_pad_0, pad_type = var_2805_pad_type_0, strides = var_2805_strides_0, weight = blocks_9_mlp_2_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("op_2805_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = var_2805_cast_fp16)[name = tensor<string, []>("inputs_41_cast_fp16")];
+            tensor<int32, []> var_2814 = const()[name = tensor<string, []>("op_2814"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_103_axes_0 = const()[name = tensor<string, []>("input_103_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_103_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_103_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(407823104)))];
+            tensor<fp16, [1280]> input_103_beta_0_to_fp16 = const()[name = tensor<string, []>("input_103_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(407825728)))];
+            tensor<fp16, []> var_2830_to_fp16 = const()[name = tensor<string, []>("op_2830_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_103_cast_fp16 = layer_norm(axes = input_103_axes_0, beta = input_103_beta_0_to_fp16, epsilon = var_2830_to_fp16, gamma = input_103_gamma_0_to_fp16, x = inputs_41_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
+            tensor<string, []> q_21_pad_type_0 = const()[name = tensor<string, []>("q_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_21_strides_0 = const()[name = tensor<string, []>("q_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_21_pad_0 = const()[name = tensor<string, []>("q_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_21_dilations_0 = const()[name = tensor<string, []>("q_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_21_groups_0 = const()[name = tensor<string, []>("q_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_2865_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2865_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(407828352)))];
+            tensor<fp16, [1280]> var_2865_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2865_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(411105216)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2865_cast_fp16 = conv(bias = var_2865_bias_0_to_fp16, dilations = q_21_dilations_0, groups = q_21_groups_0, pad = q_21_pad_0, pad_type = q_21_pad_type_0, strides = q_21_strides_0, weight = var_2865_weight_0_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("op_2865_cast_fp16")];
+            tensor<string, []> k_21_pad_type_0 = const()[name = tensor<string, []>("k_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_21_strides_0 = const()[name = tensor<string, []>("k_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_21_pad_0 = const()[name = tensor<string, []>("k_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_21_dilations_0 = const()[name = tensor<string, []>("k_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_21_groups_0 = const()[name = tensor<string, []>("k_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_10_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(411107840)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_21_cast_fp16 = conv(dilations = k_21_dilations_0, groups = k_21_groups_0, pad = k_21_pad_0, pad_type = k_21_pad_type_0, strides = k_21_strides_0, weight = blocks_10_attn_key_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("k_21_cast_fp16")];
+            tensor<string, []> var_2863_pad_type_0 = const()[name = tensor<string, []>("op_2863_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2863_strides_0 = const()[name = tensor<string, []>("op_2863_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2863_pad_0 = const()[name = tensor<string, []>("op_2863_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2863_dilations_0 = const()[name = tensor<string, []>("op_2863_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2863_groups_0 = const()[name = tensor<string, []>("op_2863_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_10_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(414384704)))];
+            tensor<fp16, [1280]> blocks_10_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(417661568)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2863_cast_fp16 = conv(bias = blocks_10_attn_value_bias_to_fp16, dilations = var_2863_dilations_0, groups = var_2863_groups_0, pad = var_2863_pad_0, pad_type = var_2863_pad_type_0, strides = var_2863_strides_0, weight = blocks_10_attn_value_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("op_2863_cast_fp16")];
+            tensor<int32, [20]> tile_30 = const()[name = tensor<string, []>("tile_30"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2866_axis_0 = const()[name = tensor<string, []>("op_2866_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_19 = split(axis = var_2866_axis_0, split_sizes = tile_30, x = var_2865_cast_fp16)[name = tensor<string, []>("op_2866_cast_fp16")];
+            tensor<int32, [4]> var_2887_perm_0 = const()[name = tensor<string, []>("op_2887_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_31 = const()[name = tensor<string, []>("tile_31"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2888_axis_0 = const()[name = tensor<string, []>("op_2888_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_2887_cast_fp16 = transpose(perm = var_2887_perm_0, x = k_21_cast_fp16)[name = tensor<string, []>("transpose_22")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_19 = split(axis = var_2888_axis_0, split_sizes = tile_31, x = var_2887_cast_fp16)[name = tensor<string, []>("op_2888_cast_fp16")];
+            tensor<int32, [20]> tile_32 = const()[name = tensor<string, []>("tile_32"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2909_axis_0 = const()[name = tensor<string, []>("op_2909_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_19 = split(axis = var_2909_axis_0, split_sizes = tile_32, x = var_2863_cast_fp16)[name = tensor<string, []>("op_2909_cast_fp16")];
+            tensor<string, []> aw_401_equation_0 = const()[name = tensor<string, []>("aw_401_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_401_cast_fp16 = einsum(equation = aw_401_equation_0, values = (var_2888_cast_fp16_0, var_2866_cast_fp16_0))[name = tensor<string, []>("aw_401_cast_fp16")];
+            tensor<string, []> aw_403_equation_0 = const()[name = tensor<string, []>("aw_403_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_403_cast_fp16 = einsum(equation = aw_403_equation_0, values = (var_2888_cast_fp16_1, var_2866_cast_fp16_1))[name = tensor<string, []>("aw_403_cast_fp16")];
+            tensor<string, []> aw_405_equation_0 = const()[name = tensor<string, []>("aw_405_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_405_cast_fp16 = einsum(equation = aw_405_equation_0, values = (var_2888_cast_fp16_2, var_2866_cast_fp16_2))[name = tensor<string, []>("aw_405_cast_fp16")];
+            tensor<string, []> aw_407_equation_0 = const()[name = tensor<string, []>("aw_407_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_407_cast_fp16 = einsum(equation = aw_407_equation_0, values = (var_2888_cast_fp16_3, var_2866_cast_fp16_3))[name = tensor<string, []>("aw_407_cast_fp16")];
+            tensor<string, []> aw_409_equation_0 = const()[name = tensor<string, []>("aw_409_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_409_cast_fp16 = einsum(equation = aw_409_equation_0, values = (var_2888_cast_fp16_4, var_2866_cast_fp16_4))[name = tensor<string, []>("aw_409_cast_fp16")];
+            tensor<string, []> aw_411_equation_0 = const()[name = tensor<string, []>("aw_411_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_411_cast_fp16 = einsum(equation = aw_411_equation_0, values = (var_2888_cast_fp16_5, var_2866_cast_fp16_5))[name = tensor<string, []>("aw_411_cast_fp16")];
+            tensor<string, []> aw_413_equation_0 = const()[name = tensor<string, []>("aw_413_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_413_cast_fp16 = einsum(equation = aw_413_equation_0, values = (var_2888_cast_fp16_6, var_2866_cast_fp16_6))[name = tensor<string, []>("aw_413_cast_fp16")];
+            tensor<string, []> aw_415_equation_0 = const()[name = tensor<string, []>("aw_415_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_415_cast_fp16 = einsum(equation = aw_415_equation_0, values = (var_2888_cast_fp16_7, var_2866_cast_fp16_7))[name = tensor<string, []>("aw_415_cast_fp16")];
+            tensor<string, []> aw_417_equation_0 = const()[name = tensor<string, []>("aw_417_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_417_cast_fp16 = einsum(equation = aw_417_equation_0, values = (var_2888_cast_fp16_8, var_2866_cast_fp16_8))[name = tensor<string, []>("aw_417_cast_fp16")];
+            tensor<string, []> aw_419_equation_0 = const()[name = tensor<string, []>("aw_419_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_419_cast_fp16 = einsum(equation = aw_419_equation_0, values = (var_2888_cast_fp16_9, var_2866_cast_fp16_9))[name = tensor<string, []>("aw_419_cast_fp16")];
+            tensor<string, []> aw_421_equation_0 = const()[name = tensor<string, []>("aw_421_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_421_cast_fp16 = einsum(equation = aw_421_equation_0, values = (var_2888_cast_fp16_10, var_2866_cast_fp16_10))[name = tensor<string, []>("aw_421_cast_fp16")];
+            tensor<string, []> aw_423_equation_0 = const()[name = tensor<string, []>("aw_423_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_423_cast_fp16 = einsum(equation = aw_423_equation_0, values = (var_2888_cast_fp16_11, var_2866_cast_fp16_11))[name = tensor<string, []>("aw_423_cast_fp16")];
+            tensor<string, []> aw_425_equation_0 = const()[name = tensor<string, []>("aw_425_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_425_cast_fp16 = einsum(equation = aw_425_equation_0, values = (var_2888_cast_fp16_12, var_2866_cast_fp16_12))[name = tensor<string, []>("aw_425_cast_fp16")];
+            tensor<string, []> aw_427_equation_0 = const()[name = tensor<string, []>("aw_427_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_427_cast_fp16 = einsum(equation = aw_427_equation_0, values = (var_2888_cast_fp16_13, var_2866_cast_fp16_13))[name = tensor<string, []>("aw_427_cast_fp16")];
+            tensor<string, []> aw_429_equation_0 = const()[name = tensor<string, []>("aw_429_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_429_cast_fp16 = einsum(equation = aw_429_equation_0, values = (var_2888_cast_fp16_14, var_2866_cast_fp16_14))[name = tensor<string, []>("aw_429_cast_fp16")];
+            tensor<string, []> aw_431_equation_0 = const()[name = tensor<string, []>("aw_431_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_431_cast_fp16 = einsum(equation = aw_431_equation_0, values = (var_2888_cast_fp16_15, var_2866_cast_fp16_15))[name = tensor<string, []>("aw_431_cast_fp16")];
+            tensor<string, []> aw_433_equation_0 = const()[name = tensor<string, []>("aw_433_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_433_cast_fp16 = einsum(equation = aw_433_equation_0, values = (var_2888_cast_fp16_16, var_2866_cast_fp16_16))[name = tensor<string, []>("aw_433_cast_fp16")];
+            tensor<string, []> aw_435_equation_0 = const()[name = tensor<string, []>("aw_435_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_435_cast_fp16 = einsum(equation = aw_435_equation_0, values = (var_2888_cast_fp16_17, var_2866_cast_fp16_17))[name = tensor<string, []>("aw_435_cast_fp16")];
+            tensor<string, []> aw_437_equation_0 = const()[name = tensor<string, []>("aw_437_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_437_cast_fp16 = einsum(equation = aw_437_equation_0, values = (var_2888_cast_fp16_18, var_2866_cast_fp16_18))[name = tensor<string, []>("aw_437_cast_fp16")];
+            tensor<string, []> aw_439_equation_0 = const()[name = tensor<string, []>("aw_439_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_439_cast_fp16 = einsum(equation = aw_439_equation_0, values = (var_2888_cast_fp16_19, var_2866_cast_fp16_19))[name = tensor<string, []>("aw_439_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2970_cast_fp16 = softmax(axis = var_2814, x = aw_401_cast_fp16)[name = tensor<string, []>("op_2970_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2971_cast_fp16 = softmax(axis = var_2814, x = aw_403_cast_fp16)[name = tensor<string, []>("op_2971_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2972_cast_fp16 = softmax(axis = var_2814, x = aw_405_cast_fp16)[name = tensor<string, []>("op_2972_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2973_cast_fp16 = softmax(axis = var_2814, x = aw_407_cast_fp16)[name = tensor<string, []>("op_2973_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2974_cast_fp16 = softmax(axis = var_2814, x = aw_409_cast_fp16)[name = tensor<string, []>("op_2974_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2975_cast_fp16 = softmax(axis = var_2814, x = aw_411_cast_fp16)[name = tensor<string, []>("op_2975_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2976_cast_fp16 = softmax(axis = var_2814, x = aw_413_cast_fp16)[name = tensor<string, []>("op_2976_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2977_cast_fp16 = softmax(axis = var_2814, x = aw_415_cast_fp16)[name = tensor<string, []>("op_2977_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2978_cast_fp16 = softmax(axis = var_2814, x = aw_417_cast_fp16)[name = tensor<string, []>("op_2978_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2979_cast_fp16 = softmax(axis = var_2814, x = aw_419_cast_fp16)[name = tensor<string, []>("op_2979_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2980_cast_fp16 = softmax(axis = var_2814, x = aw_421_cast_fp16)[name = tensor<string, []>("op_2980_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2981_cast_fp16 = softmax(axis = var_2814, x = aw_423_cast_fp16)[name = tensor<string, []>("op_2981_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2982_cast_fp16 = softmax(axis = var_2814, x = aw_425_cast_fp16)[name = tensor<string, []>("op_2982_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2983_cast_fp16 = softmax(axis = var_2814, x = aw_427_cast_fp16)[name = tensor<string, []>("op_2983_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2984_cast_fp16 = softmax(axis = var_2814, x = aw_429_cast_fp16)[name = tensor<string, []>("op_2984_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2985_cast_fp16 = softmax(axis = var_2814, x = aw_431_cast_fp16)[name = tensor<string, []>("op_2985_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2986_cast_fp16 = softmax(axis = var_2814, x = aw_433_cast_fp16)[name = tensor<string, []>("op_2986_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2987_cast_fp16 = softmax(axis = var_2814, x = aw_435_cast_fp16)[name = tensor<string, []>("op_2987_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2988_cast_fp16 = softmax(axis = var_2814, x = aw_437_cast_fp16)[name = tensor<string, []>("op_2988_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2989_cast_fp16 = softmax(axis = var_2814, x = aw_439_cast_fp16)[name = tensor<string, []>("op_2989_cast_fp16")];
+            tensor<string, []> var_2991_equation_0 = const()[name = tensor<string, []>("op_2991_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2991_cast_fp16 = einsum(equation = var_2991_equation_0, values = (var_2909_cast_fp16_0, var_2970_cast_fp16))[name = tensor<string, []>("op_2991_cast_fp16")];
+            tensor<string, []> var_2993_equation_0 = const()[name = tensor<string, []>("op_2993_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2993_cast_fp16 = einsum(equation = var_2993_equation_0, values = (var_2909_cast_fp16_1, var_2971_cast_fp16))[name = tensor<string, []>("op_2993_cast_fp16")];
+            tensor<string, []> var_2995_equation_0 = const()[name = tensor<string, []>("op_2995_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2995_cast_fp16 = einsum(equation = var_2995_equation_0, values = (var_2909_cast_fp16_2, var_2972_cast_fp16))[name = tensor<string, []>("op_2995_cast_fp16")];
+            tensor<string, []> var_2997_equation_0 = const()[name = tensor<string, []>("op_2997_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2997_cast_fp16 = einsum(equation = var_2997_equation_0, values = (var_2909_cast_fp16_3, var_2973_cast_fp16))[name = tensor<string, []>("op_2997_cast_fp16")];
+            tensor<string, []> var_2999_equation_0 = const()[name = tensor<string, []>("op_2999_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2999_cast_fp16 = einsum(equation = var_2999_equation_0, values = (var_2909_cast_fp16_4, var_2974_cast_fp16))[name = tensor<string, []>("op_2999_cast_fp16")];
+            tensor<string, []> var_3001_equation_0 = const()[name = tensor<string, []>("op_3001_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3001_cast_fp16 = einsum(equation = var_3001_equation_0, values = (var_2909_cast_fp16_5, var_2975_cast_fp16))[name = tensor<string, []>("op_3001_cast_fp16")];
+            tensor<string, []> var_3003_equation_0 = const()[name = tensor<string, []>("op_3003_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3003_cast_fp16 = einsum(equation = var_3003_equation_0, values = (var_2909_cast_fp16_6, var_2976_cast_fp16))[name = tensor<string, []>("op_3003_cast_fp16")];
+            tensor<string, []> var_3005_equation_0 = const()[name = tensor<string, []>("op_3005_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3005_cast_fp16 = einsum(equation = var_3005_equation_0, values = (var_2909_cast_fp16_7, var_2977_cast_fp16))[name = tensor<string, []>("op_3005_cast_fp16")];
+            tensor<string, []> var_3007_equation_0 = const()[name = tensor<string, []>("op_3007_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3007_cast_fp16 = einsum(equation = var_3007_equation_0, values = (var_2909_cast_fp16_8, var_2978_cast_fp16))[name = tensor<string, []>("op_3007_cast_fp16")];
+            tensor<string, []> var_3009_equation_0 = const()[name = tensor<string, []>("op_3009_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3009_cast_fp16 = einsum(equation = var_3009_equation_0, values = (var_2909_cast_fp16_9, var_2979_cast_fp16))[name = tensor<string, []>("op_3009_cast_fp16")];
+            tensor<string, []> var_3011_equation_0 = const()[name = tensor<string, []>("op_3011_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3011_cast_fp16 = einsum(equation = var_3011_equation_0, values = (var_2909_cast_fp16_10, var_2980_cast_fp16))[name = tensor<string, []>("op_3011_cast_fp16")];
+            tensor<string, []> var_3013_equation_0 = const()[name = tensor<string, []>("op_3013_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3013_cast_fp16 = einsum(equation = var_3013_equation_0, values = (var_2909_cast_fp16_11, var_2981_cast_fp16))[name = tensor<string, []>("op_3013_cast_fp16")];
+            tensor<string, []> var_3015_equation_0 = const()[name = tensor<string, []>("op_3015_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3015_cast_fp16 = einsum(equation = var_3015_equation_0, values = (var_2909_cast_fp16_12, var_2982_cast_fp16))[name = tensor<string, []>("op_3015_cast_fp16")];
+            tensor<string, []> var_3017_equation_0 = const()[name = tensor<string, []>("op_3017_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3017_cast_fp16 = einsum(equation = var_3017_equation_0, values = (var_2909_cast_fp16_13, var_2983_cast_fp16))[name = tensor<string, []>("op_3017_cast_fp16")];
+            tensor<string, []> var_3019_equation_0 = const()[name = tensor<string, []>("op_3019_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3019_cast_fp16 = einsum(equation = var_3019_equation_0, values = (var_2909_cast_fp16_14, var_2984_cast_fp16))[name = tensor<string, []>("op_3019_cast_fp16")];
+            tensor<string, []> var_3021_equation_0 = const()[name = tensor<string, []>("op_3021_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3021_cast_fp16 = einsum(equation = var_3021_equation_0, values = (var_2909_cast_fp16_15, var_2985_cast_fp16))[name = tensor<string, []>("op_3021_cast_fp16")];
+            tensor<string, []> var_3023_equation_0 = const()[name = tensor<string, []>("op_3023_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3023_cast_fp16 = einsum(equation = var_3023_equation_0, values = (var_2909_cast_fp16_16, var_2986_cast_fp16))[name = tensor<string, []>("op_3023_cast_fp16")];
+            tensor<string, []> var_3025_equation_0 = const()[name = tensor<string, []>("op_3025_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3025_cast_fp16 = einsum(equation = var_3025_equation_0, values = (var_2909_cast_fp16_17, var_2987_cast_fp16))[name = tensor<string, []>("op_3025_cast_fp16")];
+            tensor<string, []> var_3027_equation_0 = const()[name = tensor<string, []>("op_3027_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3027_cast_fp16 = einsum(equation = var_3027_equation_0, values = (var_2909_cast_fp16_18, var_2988_cast_fp16))[name = tensor<string, []>("op_3027_cast_fp16")];
+            tensor<string, []> var_3029_equation_0 = const()[name = tensor<string, []>("op_3029_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3029_cast_fp16 = einsum(equation = var_3029_equation_0, values = (var_2909_cast_fp16_19, var_2989_cast_fp16))[name = tensor<string, []>("op_3029_cast_fp16")];
+            tensor<bool, []> input_105_interleave_0 = const()[name = tensor<string, []>("input_105_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_105_cast_fp16 = concat(axis = var_2814, interleave = input_105_interleave_0, values = (var_2991_cast_fp16, var_2993_cast_fp16, var_2995_cast_fp16, var_2997_cast_fp16, var_2999_cast_fp16, var_3001_cast_fp16, var_3003_cast_fp16, var_3005_cast_fp16, var_3007_cast_fp16, var_3009_cast_fp16, var_3011_cast_fp16, var_3013_cast_fp16, var_3015_cast_fp16, var_3017_cast_fp16, var_3019_cast_fp16, var_3021_cast_fp16, var_3023_cast_fp16, var_3025_cast_fp16, var_3027_cast_fp16, var_3029_cast_fp16))[name = tensor<string, []>("input_105_cast_fp16")];
+            tensor<string, []> var_3038_pad_type_0 = const()[name = tensor<string, []>("op_3038_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3038_strides_0 = const()[name = tensor<string, []>("op_3038_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3038_pad_0 = const()[name = tensor<string, []>("op_3038_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3038_dilations_0 = const()[name = tensor<string, []>("op_3038_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3038_groups_0 = const()[name = tensor<string, []>("op_3038_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_10_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(417664192)))];
+            tensor<fp16, [1280]> blocks_10_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(420941056)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3038_cast_fp16 = conv(bias = blocks_10_attn_out_bias_to_fp16, dilations = var_3038_dilations_0, groups = var_3038_groups_0, pad = var_3038_pad_0, pad_type = var_3038_pad_type_0, strides = var_3038_strides_0, weight = blocks_10_attn_out_weight_to_fp16, x = input_105_cast_fp16)[name = tensor<string, []>("op_3038_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = var_3038_cast_fp16)[name = tensor<string, []>("inputs_43_cast_fp16")];
+            tensor<int32, [1]> input_107_axes_0 = const()[name = tensor<string, []>("input_107_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_107_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_107_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(420943680)))];
+            tensor<fp16, [1280]> input_107_beta_0_to_fp16 = const()[name = tensor<string, []>("input_107_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(420946304)))];
+            tensor<fp16, []> var_3048_to_fp16 = const()[name = tensor<string, []>("op_3048_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_107_cast_fp16 = layer_norm(axes = input_107_axes_0, beta = input_107_beta_0_to_fp16, epsilon = var_3048_to_fp16, gamma = input_107_gamma_0_to_fp16, x = inputs_43_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
+            tensor<string, []> input_109_pad_type_0 = const()[name = tensor<string, []>("input_109_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_109_strides_0 = const()[name = tensor<string, []>("input_109_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_109_pad_0 = const()[name = tensor<string, []>("input_109_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_109_dilations_0 = const()[name = tensor<string, []>("input_109_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_109_groups_0 = const()[name = tensor<string, []>("input_109_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_10_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(420948928)))];
+            tensor<fp16, [5120]> blocks_10_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434056192)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_109_cast_fp16 = conv(bias = blocks_10_mlp_0_bias_to_fp16, dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = blocks_10_mlp_0_weight_to_fp16, x = input_107_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
+            tensor<string, []> input_111_mode_0 = const()[name = tensor<string, []>("input_111_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_111_cast_fp16 = gelu(mode = input_111_mode_0, x = input_109_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
+            tensor<string, []> var_3074_pad_type_0 = const()[name = tensor<string, []>("op_3074_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3074_strides_0 = const()[name = tensor<string, []>("op_3074_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3074_pad_0 = const()[name = tensor<string, []>("op_3074_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3074_dilations_0 = const()[name = tensor<string, []>("op_3074_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3074_groups_0 = const()[name = tensor<string, []>("op_3074_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_10_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434066496)))];
+            tensor<fp16, [1280]> blocks_10_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447173760)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3074_cast_fp16 = conv(bias = blocks_10_mlp_2_bias_to_fp16, dilations = var_3074_dilations_0, groups = var_3074_groups_0, pad = var_3074_pad_0, pad_type = var_3074_pad_type_0, strides = var_3074_strides_0, weight = blocks_10_mlp_2_weight_to_fp16, x = input_111_cast_fp16)[name = tensor<string, []>("op_3074_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = var_3074_cast_fp16)[name = tensor<string, []>("inputs_45_cast_fp16")];
+            tensor<int32, []> var_3083 = const()[name = tensor<string, []>("op_3083"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_113_axes_0 = const()[name = tensor<string, []>("input_113_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_113_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_113_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447176384)))];
+            tensor<fp16, [1280]> input_113_beta_0_to_fp16 = const()[name = tensor<string, []>("input_113_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447179008)))];
+            tensor<fp16, []> var_3099_to_fp16 = const()[name = tensor<string, []>("op_3099_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_113_cast_fp16 = layer_norm(axes = input_113_axes_0, beta = input_113_beta_0_to_fp16, epsilon = var_3099_to_fp16, gamma = input_113_gamma_0_to_fp16, x = inputs_45_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
+            tensor<string, []> q_23_pad_type_0 = const()[name = tensor<string, []>("q_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_23_strides_0 = const()[name = tensor<string, []>("q_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_23_pad_0 = const()[name = tensor<string, []>("q_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_23_dilations_0 = const()[name = tensor<string, []>("q_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_23_groups_0 = const()[name = tensor<string, []>("q_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_3134_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3134_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447181632)))];
+            tensor<fp16, [1280]> var_3134_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3134_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(450458496)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3134_cast_fp16 = conv(bias = var_3134_bias_0_to_fp16, dilations = q_23_dilations_0, groups = q_23_groups_0, pad = q_23_pad_0, pad_type = q_23_pad_type_0, strides = q_23_strides_0, weight = var_3134_weight_0_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("op_3134_cast_fp16")];
+            tensor<string, []> k_23_pad_type_0 = const()[name = tensor<string, []>("k_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_23_strides_0 = const()[name = tensor<string, []>("k_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_23_pad_0 = const()[name = tensor<string, []>("k_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_23_dilations_0 = const()[name = tensor<string, []>("k_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_23_groups_0 = const()[name = tensor<string, []>("k_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_11_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(450461120)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_23_cast_fp16 = conv(dilations = k_23_dilations_0, groups = k_23_groups_0, pad = k_23_pad_0, pad_type = k_23_pad_type_0, strides = k_23_strides_0, weight = blocks_11_attn_key_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("k_23_cast_fp16")];
+            tensor<string, []> var_3132_pad_type_0 = const()[name = tensor<string, []>("op_3132_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3132_strides_0 = const()[name = tensor<string, []>("op_3132_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3132_pad_0 = const()[name = tensor<string, []>("op_3132_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3132_dilations_0 = const()[name = tensor<string, []>("op_3132_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3132_groups_0 = const()[name = tensor<string, []>("op_3132_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_11_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(453737984)))];
+            tensor<fp16, [1280]> blocks_11_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(457014848)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3132_cast_fp16 = conv(bias = blocks_11_attn_value_bias_to_fp16, dilations = var_3132_dilations_0, groups = var_3132_groups_0, pad = var_3132_pad_0, pad_type = var_3132_pad_type_0, strides = var_3132_strides_0, weight = blocks_11_attn_value_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("op_3132_cast_fp16")];
+            tensor<int32, [20]> tile_33 = const()[name = tensor<string, []>("tile_33"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3135_axis_0 = const()[name = tensor<string, []>("op_3135_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_19 = split(axis = var_3135_axis_0, split_sizes = tile_33, x = var_3134_cast_fp16)[name = tensor<string, []>("op_3135_cast_fp16")];
+            tensor<int32, [4]> var_3156_perm_0 = const()[name = tensor<string, []>("op_3156_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_34 = const()[name = tensor<string, []>("tile_34"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3157_axis_0 = const()[name = tensor<string, []>("op_3157_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_3156_cast_fp16 = transpose(perm = var_3156_perm_0, x = k_23_cast_fp16)[name = tensor<string, []>("transpose_21")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_19 = split(axis = var_3157_axis_0, split_sizes = tile_34, x = var_3156_cast_fp16)[name = tensor<string, []>("op_3157_cast_fp16")];
+            tensor<int32, [20]> tile_35 = const()[name = tensor<string, []>("tile_35"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3178_axis_0 = const()[name = tensor<string, []>("op_3178_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_19 = split(axis = var_3178_axis_0, split_sizes = tile_35, x = var_3132_cast_fp16)[name = tensor<string, []>("op_3178_cast_fp16")];
+            tensor<string, []> aw_441_equation_0 = const()[name = tensor<string, []>("aw_441_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_441_cast_fp16 = einsum(equation = aw_441_equation_0, values = (var_3157_cast_fp16_0, var_3135_cast_fp16_0))[name = tensor<string, []>("aw_441_cast_fp16")];
+            tensor<string, []> aw_443_equation_0 = const()[name = tensor<string, []>("aw_443_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_443_cast_fp16 = einsum(equation = aw_443_equation_0, values = (var_3157_cast_fp16_1, var_3135_cast_fp16_1))[name = tensor<string, []>("aw_443_cast_fp16")];
+            tensor<string, []> aw_445_equation_0 = const()[name = tensor<string, []>("aw_445_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_445_cast_fp16 = einsum(equation = aw_445_equation_0, values = (var_3157_cast_fp16_2, var_3135_cast_fp16_2))[name = tensor<string, []>("aw_445_cast_fp16")];
+            tensor<string, []> aw_447_equation_0 = const()[name = tensor<string, []>("aw_447_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_447_cast_fp16 = einsum(equation = aw_447_equation_0, values = (var_3157_cast_fp16_3, var_3135_cast_fp16_3))[name = tensor<string, []>("aw_447_cast_fp16")];
+            tensor<string, []> aw_449_equation_0 = const()[name = tensor<string, []>("aw_449_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_449_cast_fp16 = einsum(equation = aw_449_equation_0, values = (var_3157_cast_fp16_4, var_3135_cast_fp16_4))[name = tensor<string, []>("aw_449_cast_fp16")];
+            tensor<string, []> aw_451_equation_0 = const()[name = tensor<string, []>("aw_451_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_451_cast_fp16 = einsum(equation = aw_451_equation_0, values = (var_3157_cast_fp16_5, var_3135_cast_fp16_5))[name = tensor<string, []>("aw_451_cast_fp16")];
+            tensor<string, []> aw_453_equation_0 = const()[name = tensor<string, []>("aw_453_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_453_cast_fp16 = einsum(equation = aw_453_equation_0, values = (var_3157_cast_fp16_6, var_3135_cast_fp16_6))[name = tensor<string, []>("aw_453_cast_fp16")];
+            tensor<string, []> aw_455_equation_0 = const()[name = tensor<string, []>("aw_455_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_455_cast_fp16 = einsum(equation = aw_455_equation_0, values = (var_3157_cast_fp16_7, var_3135_cast_fp16_7))[name = tensor<string, []>("aw_455_cast_fp16")];
+            tensor<string, []> aw_457_equation_0 = const()[name = tensor<string, []>("aw_457_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_457_cast_fp16 = einsum(equation = aw_457_equation_0, values = (var_3157_cast_fp16_8, var_3135_cast_fp16_8))[name = tensor<string, []>("aw_457_cast_fp16")];
+            tensor<string, []> aw_459_equation_0 = const()[name = tensor<string, []>("aw_459_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_459_cast_fp16 = einsum(equation = aw_459_equation_0, values = (var_3157_cast_fp16_9, var_3135_cast_fp16_9))[name = tensor<string, []>("aw_459_cast_fp16")];
+            tensor<string, []> aw_461_equation_0 = const()[name = tensor<string, []>("aw_461_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_461_cast_fp16 = einsum(equation = aw_461_equation_0, values = (var_3157_cast_fp16_10, var_3135_cast_fp16_10))[name = tensor<string, []>("aw_461_cast_fp16")];
+            tensor<string, []> aw_463_equation_0 = const()[name = tensor<string, []>("aw_463_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_463_cast_fp16 = einsum(equation = aw_463_equation_0, values = (var_3157_cast_fp16_11, var_3135_cast_fp16_11))[name = tensor<string, []>("aw_463_cast_fp16")];
+            tensor<string, []> aw_465_equation_0 = const()[name = tensor<string, []>("aw_465_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_465_cast_fp16 = einsum(equation = aw_465_equation_0, values = (var_3157_cast_fp16_12, var_3135_cast_fp16_12))[name = tensor<string, []>("aw_465_cast_fp16")];
+            tensor<string, []> aw_467_equation_0 = const()[name = tensor<string, []>("aw_467_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_467_cast_fp16 = einsum(equation = aw_467_equation_0, values = (var_3157_cast_fp16_13, var_3135_cast_fp16_13))[name = tensor<string, []>("aw_467_cast_fp16")];
+            tensor<string, []> aw_469_equation_0 = const()[name = tensor<string, []>("aw_469_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_469_cast_fp16 = einsum(equation = aw_469_equation_0, values = (var_3157_cast_fp16_14, var_3135_cast_fp16_14))[name = tensor<string, []>("aw_469_cast_fp16")];
+            tensor<string, []> aw_471_equation_0 = const()[name = tensor<string, []>("aw_471_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_471_cast_fp16 = einsum(equation = aw_471_equation_0, values = (var_3157_cast_fp16_15, var_3135_cast_fp16_15))[name = tensor<string, []>("aw_471_cast_fp16")];
+            tensor<string, []> aw_473_equation_0 = const()[name = tensor<string, []>("aw_473_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_473_cast_fp16 = einsum(equation = aw_473_equation_0, values = (var_3157_cast_fp16_16, var_3135_cast_fp16_16))[name = tensor<string, []>("aw_473_cast_fp16")];
+            tensor<string, []> aw_475_equation_0 = const()[name = tensor<string, []>("aw_475_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_475_cast_fp16 = einsum(equation = aw_475_equation_0, values = (var_3157_cast_fp16_17, var_3135_cast_fp16_17))[name = tensor<string, []>("aw_475_cast_fp16")];
+            tensor<string, []> aw_477_equation_0 = const()[name = tensor<string, []>("aw_477_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_477_cast_fp16 = einsum(equation = aw_477_equation_0, values = (var_3157_cast_fp16_18, var_3135_cast_fp16_18))[name = tensor<string, []>("aw_477_cast_fp16")];
+            tensor<string, []> aw_479_equation_0 = const()[name = tensor<string, []>("aw_479_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_479_cast_fp16 = einsum(equation = aw_479_equation_0, values = (var_3157_cast_fp16_19, var_3135_cast_fp16_19))[name = tensor<string, []>("aw_479_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3239_cast_fp16 = softmax(axis = var_3083, x = aw_441_cast_fp16)[name = tensor<string, []>("op_3239_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3240_cast_fp16 = softmax(axis = var_3083, x = aw_443_cast_fp16)[name = tensor<string, []>("op_3240_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3241_cast_fp16 = softmax(axis = var_3083, x = aw_445_cast_fp16)[name = tensor<string, []>("op_3241_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3242_cast_fp16 = softmax(axis = var_3083, x = aw_447_cast_fp16)[name = tensor<string, []>("op_3242_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3243_cast_fp16 = softmax(axis = var_3083, x = aw_449_cast_fp16)[name = tensor<string, []>("op_3243_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3244_cast_fp16 = softmax(axis = var_3083, x = aw_451_cast_fp16)[name = tensor<string, []>("op_3244_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3245_cast_fp16 = softmax(axis = var_3083, x = aw_453_cast_fp16)[name = tensor<string, []>("op_3245_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3246_cast_fp16 = softmax(axis = var_3083, x = aw_455_cast_fp16)[name = tensor<string, []>("op_3246_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3247_cast_fp16 = softmax(axis = var_3083, x = aw_457_cast_fp16)[name = tensor<string, []>("op_3247_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3248_cast_fp16 = softmax(axis = var_3083, x = aw_459_cast_fp16)[name = tensor<string, []>("op_3248_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3249_cast_fp16 = softmax(axis = var_3083, x = aw_461_cast_fp16)[name = tensor<string, []>("op_3249_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3250_cast_fp16 = softmax(axis = var_3083, x = aw_463_cast_fp16)[name = tensor<string, []>("op_3250_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3251_cast_fp16 = softmax(axis = var_3083, x = aw_465_cast_fp16)[name = tensor<string, []>("op_3251_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3252_cast_fp16 = softmax(axis = var_3083, x = aw_467_cast_fp16)[name = tensor<string, []>("op_3252_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3253_cast_fp16 = softmax(axis = var_3083, x = aw_469_cast_fp16)[name = tensor<string, []>("op_3253_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3254_cast_fp16 = softmax(axis = var_3083, x = aw_471_cast_fp16)[name = tensor<string, []>("op_3254_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3255_cast_fp16 = softmax(axis = var_3083, x = aw_473_cast_fp16)[name = tensor<string, []>("op_3255_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3256_cast_fp16 = softmax(axis = var_3083, x = aw_475_cast_fp16)[name = tensor<string, []>("op_3256_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3257_cast_fp16 = softmax(axis = var_3083, x = aw_477_cast_fp16)[name = tensor<string, []>("op_3257_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3258_cast_fp16 = softmax(axis = var_3083, x = aw_479_cast_fp16)[name = tensor<string, []>("op_3258_cast_fp16")];
+            tensor<string, []> var_3260_equation_0 = const()[name = tensor<string, []>("op_3260_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3260_cast_fp16 = einsum(equation = var_3260_equation_0, values = (var_3178_cast_fp16_0, var_3239_cast_fp16))[name = tensor<string, []>("op_3260_cast_fp16")];
+            tensor<string, []> var_3262_equation_0 = const()[name = tensor<string, []>("op_3262_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3262_cast_fp16 = einsum(equation = var_3262_equation_0, values = (var_3178_cast_fp16_1, var_3240_cast_fp16))[name = tensor<string, []>("op_3262_cast_fp16")];
+            tensor<string, []> var_3264_equation_0 = const()[name = tensor<string, []>("op_3264_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3264_cast_fp16 = einsum(equation = var_3264_equation_0, values = (var_3178_cast_fp16_2, var_3241_cast_fp16))[name = tensor<string, []>("op_3264_cast_fp16")];
+            tensor<string, []> var_3266_equation_0 = const()[name = tensor<string, []>("op_3266_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3266_cast_fp16 = einsum(equation = var_3266_equation_0, values = (var_3178_cast_fp16_3, var_3242_cast_fp16))[name = tensor<string, []>("op_3266_cast_fp16")];
+            tensor<string, []> var_3268_equation_0 = const()[name = tensor<string, []>("op_3268_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3268_cast_fp16 = einsum(equation = var_3268_equation_0, values = (var_3178_cast_fp16_4, var_3243_cast_fp16))[name = tensor<string, []>("op_3268_cast_fp16")];
+            tensor<string, []> var_3270_equation_0 = const()[name = tensor<string, []>("op_3270_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3270_cast_fp16 = einsum(equation = var_3270_equation_0, values = (var_3178_cast_fp16_5, var_3244_cast_fp16))[name = tensor<string, []>("op_3270_cast_fp16")];
+            tensor<string, []> var_3272_equation_0 = const()[name = tensor<string, []>("op_3272_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3272_cast_fp16 = einsum(equation = var_3272_equation_0, values = (var_3178_cast_fp16_6, var_3245_cast_fp16))[name = tensor<string, []>("op_3272_cast_fp16")];
+            tensor<string, []> var_3274_equation_0 = const()[name = tensor<string, []>("op_3274_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3274_cast_fp16 = einsum(equation = var_3274_equation_0, values = (var_3178_cast_fp16_7, var_3246_cast_fp16))[name = tensor<string, []>("op_3274_cast_fp16")];
+            tensor<string, []> var_3276_equation_0 = const()[name = tensor<string, []>("op_3276_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16 = einsum(equation = var_3276_equation_0, values = (var_3178_cast_fp16_8, var_3247_cast_fp16))[name = tensor<string, []>("op_3276_cast_fp16")];
+            tensor<string, []> var_3278_equation_0 = const()[name = tensor<string, []>("op_3278_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3278_cast_fp16 = einsum(equation = var_3278_equation_0, values = (var_3178_cast_fp16_9, var_3248_cast_fp16))[name = tensor<string, []>("op_3278_cast_fp16")];
+            tensor<string, []> var_3280_equation_0 = const()[name = tensor<string, []>("op_3280_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3280_cast_fp16 = einsum(equation = var_3280_equation_0, values = (var_3178_cast_fp16_10, var_3249_cast_fp16))[name = tensor<string, []>("op_3280_cast_fp16")];
+            tensor<string, []> var_3282_equation_0 = const()[name = tensor<string, []>("op_3282_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3282_cast_fp16 = einsum(equation = var_3282_equation_0, values = (var_3178_cast_fp16_11, var_3250_cast_fp16))[name = tensor<string, []>("op_3282_cast_fp16")];
+            tensor<string, []> var_3284_equation_0 = const()[name = tensor<string, []>("op_3284_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3284_cast_fp16 = einsum(equation = var_3284_equation_0, values = (var_3178_cast_fp16_12, var_3251_cast_fp16))[name = tensor<string, []>("op_3284_cast_fp16")];
+            tensor<string, []> var_3286_equation_0 = const()[name = tensor<string, []>("op_3286_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3286_cast_fp16 = einsum(equation = var_3286_equation_0, values = (var_3178_cast_fp16_13, var_3252_cast_fp16))[name = tensor<string, []>("op_3286_cast_fp16")];
+            tensor<string, []> var_3288_equation_0 = const()[name = tensor<string, []>("op_3288_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3288_cast_fp16 = einsum(equation = var_3288_equation_0, values = (var_3178_cast_fp16_14, var_3253_cast_fp16))[name = tensor<string, []>("op_3288_cast_fp16")];
+            tensor<string, []> var_3290_equation_0 = const()[name = tensor<string, []>("op_3290_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3290_cast_fp16 = einsum(equation = var_3290_equation_0, values = (var_3178_cast_fp16_15, var_3254_cast_fp16))[name = tensor<string, []>("op_3290_cast_fp16")];
+            tensor<string, []> var_3292_equation_0 = const()[name = tensor<string, []>("op_3292_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3292_cast_fp16 = einsum(equation = var_3292_equation_0, values = (var_3178_cast_fp16_16, var_3255_cast_fp16))[name = tensor<string, []>("op_3292_cast_fp16")];
+            tensor<string, []> var_3294_equation_0 = const()[name = tensor<string, []>("op_3294_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3294_cast_fp16 = einsum(equation = var_3294_equation_0, values = (var_3178_cast_fp16_17, var_3256_cast_fp16))[name = tensor<string, []>("op_3294_cast_fp16")];
+            tensor<string, []> var_3296_equation_0 = const()[name = tensor<string, []>("op_3296_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3296_cast_fp16 = einsum(equation = var_3296_equation_0, values = (var_3178_cast_fp16_18, var_3257_cast_fp16))[name = tensor<string, []>("op_3296_cast_fp16")];
+            tensor<string, []> var_3298_equation_0 = const()[name = tensor<string, []>("op_3298_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3298_cast_fp16 = einsum(equation = var_3298_equation_0, values = (var_3178_cast_fp16_19, var_3258_cast_fp16))[name = tensor<string, []>("op_3298_cast_fp16")];
+            tensor<bool, []> input_115_interleave_0 = const()[name = tensor<string, []>("input_115_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_115_cast_fp16 = concat(axis = var_3083, interleave = input_115_interleave_0, values = (var_3260_cast_fp16, var_3262_cast_fp16, var_3264_cast_fp16, var_3266_cast_fp16, var_3268_cast_fp16, var_3270_cast_fp16, var_3272_cast_fp16, var_3274_cast_fp16, var_3276_cast_fp16, var_3278_cast_fp16, var_3280_cast_fp16, var_3282_cast_fp16, var_3284_cast_fp16, var_3286_cast_fp16, var_3288_cast_fp16, var_3290_cast_fp16, var_3292_cast_fp16, var_3294_cast_fp16, var_3296_cast_fp16, var_3298_cast_fp16))[name = tensor<string, []>("input_115_cast_fp16")];
+            tensor<string, []> var_3307_pad_type_0 = const()[name = tensor<string, []>("op_3307_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3307_strides_0 = const()[name = tensor<string, []>("op_3307_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3307_pad_0 = const()[name = tensor<string, []>("op_3307_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3307_dilations_0 = const()[name = tensor<string, []>("op_3307_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3307_groups_0 = const()[name = tensor<string, []>("op_3307_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_11_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(457017472)))];
+            tensor<fp16, [1280]> blocks_11_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460294336)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3307_cast_fp16 = conv(bias = blocks_11_attn_out_bias_to_fp16, dilations = var_3307_dilations_0, groups = var_3307_groups_0, pad = var_3307_pad_0, pad_type = var_3307_pad_type_0, strides = var_3307_strides_0, weight = blocks_11_attn_out_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("op_3307_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = var_3307_cast_fp16)[name = tensor<string, []>("inputs_47_cast_fp16")];
+            tensor<int32, [1]> input_117_axes_0 = const()[name = tensor<string, []>("input_117_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_117_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_117_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460296960)))];
+            tensor<fp16, [1280]> input_117_beta_0_to_fp16 = const()[name = tensor<string, []>("input_117_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460299584)))];
+            tensor<fp16, []> var_3317_to_fp16 = const()[name = tensor<string, []>("op_3317_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_117_cast_fp16 = layer_norm(axes = input_117_axes_0, beta = input_117_beta_0_to_fp16, epsilon = var_3317_to_fp16, gamma = input_117_gamma_0_to_fp16, x = inputs_47_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
+            tensor<string, []> input_119_pad_type_0 = const()[name = tensor<string, []>("input_119_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_119_strides_0 = const()[name = tensor<string, []>("input_119_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_119_pad_0 = const()[name = tensor<string, []>("input_119_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_119_dilations_0 = const()[name = tensor<string, []>("input_119_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_119_groups_0 = const()[name = tensor<string, []>("input_119_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_11_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460302208)))];
+            tensor<fp16, [5120]> blocks_11_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(473409472)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_119_cast_fp16 = conv(bias = blocks_11_mlp_0_bias_to_fp16, dilations = input_119_dilations_0, groups = input_119_groups_0, pad = input_119_pad_0, pad_type = input_119_pad_type_0, strides = input_119_strides_0, weight = blocks_11_mlp_0_weight_to_fp16, x = input_117_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")];
+            tensor<string, []> input_121_mode_0 = const()[name = tensor<string, []>("input_121_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_121_cast_fp16 = gelu(mode = input_121_mode_0, x = input_119_cast_fp16)[name = tensor<string, []>("input_121_cast_fp16")];
+            tensor<string, []> var_3343_pad_type_0 = const()[name = tensor<string, []>("op_3343_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3343_strides_0 = const()[name = tensor<string, []>("op_3343_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3343_pad_0 = const()[name = tensor<string, []>("op_3343_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3343_dilations_0 = const()[name = tensor<string, []>("op_3343_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3343_groups_0 = const()[name = tensor<string, []>("op_3343_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_11_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(473419776)))];
+            tensor<fp16, [1280]> blocks_11_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486527040)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3343_cast_fp16 = conv(bias = blocks_11_mlp_2_bias_to_fp16, dilations = var_3343_dilations_0, groups = var_3343_groups_0, pad = var_3343_pad_0, pad_type = var_3343_pad_type_0, strides = var_3343_strides_0, weight = blocks_11_mlp_2_weight_to_fp16, x = input_121_cast_fp16)[name = tensor<string, []>("op_3343_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = var_3343_cast_fp16)[name = tensor<string, []>("inputs_49_cast_fp16")];
+            tensor<int32, []> var_3352 = const()[name = tensor<string, []>("op_3352"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_123_axes_0 = const()[name = tensor<string, []>("input_123_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_123_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_123_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486529664)))];
+            tensor<fp16, [1280]> input_123_beta_0_to_fp16 = const()[name = tensor<string, []>("input_123_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486532288)))];
+            tensor<fp16, []> var_3368_to_fp16 = const()[name = tensor<string, []>("op_3368_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_123_cast_fp16 = layer_norm(axes = input_123_axes_0, beta = input_123_beta_0_to_fp16, epsilon = var_3368_to_fp16, gamma = input_123_gamma_0_to_fp16, x = inputs_49_cast_fp16)[name = tensor<string, []>("input_123_cast_fp16")];
+            tensor<string, []> q_25_pad_type_0 = const()[name = tensor<string, []>("q_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_25_strides_0 = const()[name = tensor<string, []>("q_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_25_pad_0 = const()[name = tensor<string, []>("q_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_25_dilations_0 = const()[name = tensor<string, []>("q_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_25_groups_0 = const()[name = tensor<string, []>("q_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_3403_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3403_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486534912)))];
+            tensor<fp16, [1280]> var_3403_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3403_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(489811776)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3403_cast_fp16 = conv(bias = var_3403_bias_0_to_fp16, dilations = q_25_dilations_0, groups = q_25_groups_0, pad = q_25_pad_0, pad_type = q_25_pad_type_0, strides = q_25_strides_0, weight = var_3403_weight_0_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("op_3403_cast_fp16")];
+            tensor<string, []> k_25_pad_type_0 = const()[name = tensor<string, []>("k_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_25_strides_0 = const()[name = tensor<string, []>("k_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_25_pad_0 = const()[name = tensor<string, []>("k_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_25_dilations_0 = const()[name = tensor<string, []>("k_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_25_groups_0 = const()[name = tensor<string, []>("k_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_12_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(489814400)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_25_cast_fp16 = conv(dilations = k_25_dilations_0, groups = k_25_groups_0, pad = k_25_pad_0, pad_type = k_25_pad_type_0, strides = k_25_strides_0, weight = blocks_12_attn_key_weight_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("k_25_cast_fp16")];
+            tensor<string, []> var_3401_pad_type_0 = const()[name = tensor<string, []>("op_3401_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3401_strides_0 = const()[name = tensor<string, []>("op_3401_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3401_pad_0 = const()[name = tensor<string, []>("op_3401_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3401_dilations_0 = const()[name = tensor<string, []>("op_3401_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3401_groups_0 = const()[name = tensor<string, []>("op_3401_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_12_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(493091264)))];
+            tensor<fp16, [1280]> blocks_12_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496368128)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3401_cast_fp16 = conv(bias = blocks_12_attn_value_bias_to_fp16, dilations = var_3401_dilations_0, groups = var_3401_groups_0, pad = var_3401_pad_0, pad_type = var_3401_pad_type_0, strides = var_3401_strides_0, weight = blocks_12_attn_value_weight_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("op_3401_cast_fp16")];
+            tensor<int32, [20]> tile_36 = const()[name = tensor<string, []>("tile_36"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3404_axis_0 = const()[name = tensor<string, []>("op_3404_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_19 = split(axis = var_3404_axis_0, split_sizes = tile_36, x = var_3403_cast_fp16)[name = tensor<string, []>("op_3404_cast_fp16")];
+            tensor<int32, [4]> var_3425_perm_0 = const()[name = tensor<string, []>("op_3425_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_37 = const()[name = tensor<string, []>("tile_37"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3426_axis_0 = const()[name = tensor<string, []>("op_3426_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_3425_cast_fp16 = transpose(perm = var_3425_perm_0, x = k_25_cast_fp16)[name = tensor<string, []>("transpose_20")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_19 = split(axis = var_3426_axis_0, split_sizes = tile_37, x = var_3425_cast_fp16)[name = tensor<string, []>("op_3426_cast_fp16")];
+            tensor<int32, [20]> tile_38 = const()[name = tensor<string, []>("tile_38"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3447_axis_0 = const()[name = tensor<string, []>("op_3447_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_19 = split(axis = var_3447_axis_0, split_sizes = tile_38, x = var_3401_cast_fp16)[name = tensor<string, []>("op_3447_cast_fp16")];
+            tensor<string, []> aw_481_equation_0 = const()[name = tensor<string, []>("aw_481_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_481_cast_fp16 = einsum(equation = aw_481_equation_0, values = (var_3426_cast_fp16_0, var_3404_cast_fp16_0))[name = tensor<string, []>("aw_481_cast_fp16")];
+            tensor<string, []> aw_483_equation_0 = const()[name = tensor<string, []>("aw_483_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_483_cast_fp16 = einsum(equation = aw_483_equation_0, values = (var_3426_cast_fp16_1, var_3404_cast_fp16_1))[name = tensor<string, []>("aw_483_cast_fp16")];
+            tensor<string, []> aw_485_equation_0 = const()[name = tensor<string, []>("aw_485_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_485_cast_fp16 = einsum(equation = aw_485_equation_0, values = (var_3426_cast_fp16_2, var_3404_cast_fp16_2))[name = tensor<string, []>("aw_485_cast_fp16")];
+            tensor<string, []> aw_487_equation_0 = const()[name = tensor<string, []>("aw_487_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_487_cast_fp16 = einsum(equation = aw_487_equation_0, values = (var_3426_cast_fp16_3, var_3404_cast_fp16_3))[name = tensor<string, []>("aw_487_cast_fp16")];
+            tensor<string, []> aw_489_equation_0 = const()[name = tensor<string, []>("aw_489_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_489_cast_fp16 = einsum(equation = aw_489_equation_0, values = (var_3426_cast_fp16_4, var_3404_cast_fp16_4))[name = tensor<string, []>("aw_489_cast_fp16")];
+            tensor<string, []> aw_491_equation_0 = const()[name = tensor<string, []>("aw_491_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_491_cast_fp16 = einsum(equation = aw_491_equation_0, values = (var_3426_cast_fp16_5, var_3404_cast_fp16_5))[name = tensor<string, []>("aw_491_cast_fp16")];
+            tensor<string, []> aw_493_equation_0 = const()[name = tensor<string, []>("aw_493_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_493_cast_fp16 = einsum(equation = aw_493_equation_0, values = (var_3426_cast_fp16_6, var_3404_cast_fp16_6))[name = tensor<string, []>("aw_493_cast_fp16")];
+            tensor<string, []> aw_495_equation_0 = const()[name = tensor<string, []>("aw_495_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_495_cast_fp16 = einsum(equation = aw_495_equation_0, values = (var_3426_cast_fp16_7, var_3404_cast_fp16_7))[name = tensor<string, []>("aw_495_cast_fp16")];
+            tensor<string, []> aw_497_equation_0 = const()[name = tensor<string, []>("aw_497_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_497_cast_fp16 = einsum(equation = aw_497_equation_0, values = (var_3426_cast_fp16_8, var_3404_cast_fp16_8))[name = tensor<string, []>("aw_497_cast_fp16")];
+            tensor<string, []> aw_499_equation_0 = const()[name = tensor<string, []>("aw_499_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_499_cast_fp16 = einsum(equation = aw_499_equation_0, values = (var_3426_cast_fp16_9, var_3404_cast_fp16_9))[name = tensor<string, []>("aw_499_cast_fp16")];
+            tensor<string, []> aw_501_equation_0 = const()[name = tensor<string, []>("aw_501_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_501_cast_fp16 = einsum(equation = aw_501_equation_0, values = (var_3426_cast_fp16_10, var_3404_cast_fp16_10))[name = tensor<string, []>("aw_501_cast_fp16")];
+            tensor<string, []> aw_503_equation_0 = const()[name = tensor<string, []>("aw_503_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_503_cast_fp16 = einsum(equation = aw_503_equation_0, values = (var_3426_cast_fp16_11, var_3404_cast_fp16_11))[name = tensor<string, []>("aw_503_cast_fp16")];
+            tensor<string, []> aw_505_equation_0 = const()[name = tensor<string, []>("aw_505_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_505_cast_fp16 = einsum(equation = aw_505_equation_0, values = (var_3426_cast_fp16_12, var_3404_cast_fp16_12))[name = tensor<string, []>("aw_505_cast_fp16")];
+            tensor<string, []> aw_507_equation_0 = const()[name = tensor<string, []>("aw_507_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_507_cast_fp16 = einsum(equation = aw_507_equation_0, values = (var_3426_cast_fp16_13, var_3404_cast_fp16_13))[name = tensor<string, []>("aw_507_cast_fp16")];
+            tensor<string, []> aw_509_equation_0 = const()[name = tensor<string, []>("aw_509_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_509_cast_fp16 = einsum(equation = aw_509_equation_0, values = (var_3426_cast_fp16_14, var_3404_cast_fp16_14))[name = tensor<string, []>("aw_509_cast_fp16")];
+            tensor<string, []> aw_511_equation_0 = const()[name = tensor<string, []>("aw_511_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_511_cast_fp16 = einsum(equation = aw_511_equation_0, values = (var_3426_cast_fp16_15, var_3404_cast_fp16_15))[name = tensor<string, []>("aw_511_cast_fp16")];
+            tensor<string, []> aw_513_equation_0 = const()[name = tensor<string, []>("aw_513_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_513_cast_fp16 = einsum(equation = aw_513_equation_0, values = (var_3426_cast_fp16_16, var_3404_cast_fp16_16))[name = tensor<string, []>("aw_513_cast_fp16")];
+            tensor<string, []> aw_515_equation_0 = const()[name = tensor<string, []>("aw_515_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_515_cast_fp16 = einsum(equation = aw_515_equation_0, values = (var_3426_cast_fp16_17, var_3404_cast_fp16_17))[name = tensor<string, []>("aw_515_cast_fp16")];
+            tensor<string, []> aw_517_equation_0 = const()[name = tensor<string, []>("aw_517_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_517_cast_fp16 = einsum(equation = aw_517_equation_0, values = (var_3426_cast_fp16_18, var_3404_cast_fp16_18))[name = tensor<string, []>("aw_517_cast_fp16")];
+            tensor<string, []> aw_519_equation_0 = const()[name = tensor<string, []>("aw_519_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_519_cast_fp16 = einsum(equation = aw_519_equation_0, values = (var_3426_cast_fp16_19, var_3404_cast_fp16_19))[name = tensor<string, []>("aw_519_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3508_cast_fp16 = softmax(axis = var_3352, x = aw_481_cast_fp16)[name = tensor<string, []>("op_3508_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3509_cast_fp16 = softmax(axis = var_3352, x = aw_483_cast_fp16)[name = tensor<string, []>("op_3509_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3510_cast_fp16 = softmax(axis = var_3352, x = aw_485_cast_fp16)[name = tensor<string, []>("op_3510_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3511_cast_fp16 = softmax(axis = var_3352, x = aw_487_cast_fp16)[name = tensor<string, []>("op_3511_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3512_cast_fp16 = softmax(axis = var_3352, x = aw_489_cast_fp16)[name = tensor<string, []>("op_3512_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3513_cast_fp16 = softmax(axis = var_3352, x = aw_491_cast_fp16)[name = tensor<string, []>("op_3513_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3514_cast_fp16 = softmax(axis = var_3352, x = aw_493_cast_fp16)[name = tensor<string, []>("op_3514_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3515_cast_fp16 = softmax(axis = var_3352, x = aw_495_cast_fp16)[name = tensor<string, []>("op_3515_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3516_cast_fp16 = softmax(axis = var_3352, x = aw_497_cast_fp16)[name = tensor<string, []>("op_3516_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3517_cast_fp16 = softmax(axis = var_3352, x = aw_499_cast_fp16)[name = tensor<string, []>("op_3517_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3518_cast_fp16 = softmax(axis = var_3352, x = aw_501_cast_fp16)[name = tensor<string, []>("op_3518_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3519_cast_fp16 = softmax(axis = var_3352, x = aw_503_cast_fp16)[name = tensor<string, []>("op_3519_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3520_cast_fp16 = softmax(axis = var_3352, x = aw_505_cast_fp16)[name = tensor<string, []>("op_3520_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3521_cast_fp16 = softmax(axis = var_3352, x = aw_507_cast_fp16)[name = tensor<string, []>("op_3521_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3522_cast_fp16 = softmax(axis = var_3352, x = aw_509_cast_fp16)[name = tensor<string, []>("op_3522_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3523_cast_fp16 = softmax(axis = var_3352, x = aw_511_cast_fp16)[name = tensor<string, []>("op_3523_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3524_cast_fp16 = softmax(axis = var_3352, x = aw_513_cast_fp16)[name = tensor<string, []>("op_3524_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3525_cast_fp16 = softmax(axis = var_3352, x = aw_515_cast_fp16)[name = tensor<string, []>("op_3525_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3526_cast_fp16 = softmax(axis = var_3352, x = aw_517_cast_fp16)[name = tensor<string, []>("op_3526_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3527_cast_fp16 = softmax(axis = var_3352, x = aw_519_cast_fp16)[name = tensor<string, []>("op_3527_cast_fp16")];
+            tensor<string, []> var_3529_equation_0 = const()[name = tensor<string, []>("op_3529_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3529_cast_fp16 = einsum(equation = var_3529_equation_0, values = (var_3447_cast_fp16_0, var_3508_cast_fp16))[name = tensor<string, []>("op_3529_cast_fp16")];
+            tensor<string, []> var_3531_equation_0 = const()[name = tensor<string, []>("op_3531_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3531_cast_fp16 = einsum(equation = var_3531_equation_0, values = (var_3447_cast_fp16_1, var_3509_cast_fp16))[name = tensor<string, []>("op_3531_cast_fp16")];
+            tensor<string, []> var_3533_equation_0 = const()[name = tensor<string, []>("op_3533_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3533_cast_fp16 = einsum(equation = var_3533_equation_0, values = (var_3447_cast_fp16_2, var_3510_cast_fp16))[name = tensor<string, []>("op_3533_cast_fp16")];
+            tensor<string, []> var_3535_equation_0 = const()[name = tensor<string, []>("op_3535_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3535_cast_fp16 = einsum(equation = var_3535_equation_0, values = (var_3447_cast_fp16_3, var_3511_cast_fp16))[name = tensor<string, []>("op_3535_cast_fp16")];
+            tensor<string, []> var_3537_equation_0 = const()[name = tensor<string, []>("op_3537_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3537_cast_fp16 = einsum(equation = var_3537_equation_0, values = (var_3447_cast_fp16_4, var_3512_cast_fp16))[name = tensor<string, []>("op_3537_cast_fp16")];
+            tensor<string, []> var_3539_equation_0 = const()[name = tensor<string, []>("op_3539_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3539_cast_fp16 = einsum(equation = var_3539_equation_0, values = (var_3447_cast_fp16_5, var_3513_cast_fp16))[name = tensor<string, []>("op_3539_cast_fp16")];
+            tensor<string, []> var_3541_equation_0 = const()[name = tensor<string, []>("op_3541_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3541_cast_fp16 = einsum(equation = var_3541_equation_0, values = (var_3447_cast_fp16_6, var_3514_cast_fp16))[name = tensor<string, []>("op_3541_cast_fp16")];
+            tensor<string, []> var_3543_equation_0 = const()[name = tensor<string, []>("op_3543_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3543_cast_fp16 = einsum(equation = var_3543_equation_0, values = (var_3447_cast_fp16_7, var_3515_cast_fp16))[name = tensor<string, []>("op_3543_cast_fp16")];
+            tensor<string, []> var_3545_equation_0 = const()[name = tensor<string, []>("op_3545_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3545_cast_fp16 = einsum(equation = var_3545_equation_0, values = (var_3447_cast_fp16_8, var_3516_cast_fp16))[name = tensor<string, []>("op_3545_cast_fp16")];
+            tensor<string, []> var_3547_equation_0 = const()[name = tensor<string, []>("op_3547_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3547_cast_fp16 = einsum(equation = var_3547_equation_0, values = (var_3447_cast_fp16_9, var_3517_cast_fp16))[name = tensor<string, []>("op_3547_cast_fp16")];
+            tensor<string, []> var_3549_equation_0 = const()[name = tensor<string, []>("op_3549_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3549_cast_fp16 = einsum(equation = var_3549_equation_0, values = (var_3447_cast_fp16_10, var_3518_cast_fp16))[name = tensor<string, []>("op_3549_cast_fp16")];
+            tensor<string, []> var_3551_equation_0 = const()[name = tensor<string, []>("op_3551_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3551_cast_fp16 = einsum(equation = var_3551_equation_0, values = (var_3447_cast_fp16_11, var_3519_cast_fp16))[name = tensor<string, []>("op_3551_cast_fp16")];
+            tensor<string, []> var_3553_equation_0 = const()[name = tensor<string, []>("op_3553_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3553_cast_fp16 = einsum(equation = var_3553_equation_0, values = (var_3447_cast_fp16_12, var_3520_cast_fp16))[name = tensor<string, []>("op_3553_cast_fp16")];
+            tensor<string, []> var_3555_equation_0 = const()[name = tensor<string, []>("op_3555_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3555_cast_fp16 = einsum(equation = var_3555_equation_0, values = (var_3447_cast_fp16_13, var_3521_cast_fp16))[name = tensor<string, []>("op_3555_cast_fp16")];
+            tensor<string, []> var_3557_equation_0 = const()[name = tensor<string, []>("op_3557_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3557_cast_fp16 = einsum(equation = var_3557_equation_0, values = (var_3447_cast_fp16_14, var_3522_cast_fp16))[name = tensor<string, []>("op_3557_cast_fp16")];
+            tensor<string, []> var_3559_equation_0 = const()[name = tensor<string, []>("op_3559_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3559_cast_fp16 = einsum(equation = var_3559_equation_0, values = (var_3447_cast_fp16_15, var_3523_cast_fp16))[name = tensor<string, []>("op_3559_cast_fp16")];
+            tensor<string, []> var_3561_equation_0 = const()[name = tensor<string, []>("op_3561_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3561_cast_fp16 = einsum(equation = var_3561_equation_0, values = (var_3447_cast_fp16_16, var_3524_cast_fp16))[name = tensor<string, []>("op_3561_cast_fp16")];
+            tensor<string, []> var_3563_equation_0 = const()[name = tensor<string, []>("op_3563_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3563_cast_fp16 = einsum(equation = var_3563_equation_0, values = (var_3447_cast_fp16_17, var_3525_cast_fp16))[name = tensor<string, []>("op_3563_cast_fp16")];
+            tensor<string, []> var_3565_equation_0 = const()[name = tensor<string, []>("op_3565_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3565_cast_fp16 = einsum(equation = var_3565_equation_0, values = (var_3447_cast_fp16_18, var_3526_cast_fp16))[name = tensor<string, []>("op_3565_cast_fp16")];
+            tensor<string, []> var_3567_equation_0 = const()[name = tensor<string, []>("op_3567_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3567_cast_fp16 = einsum(equation = var_3567_equation_0, values = (var_3447_cast_fp16_19, var_3527_cast_fp16))[name = tensor<string, []>("op_3567_cast_fp16")];
+            tensor<bool, []> input_125_interleave_0 = const()[name = tensor<string, []>("input_125_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_125_cast_fp16 = concat(axis = var_3352, interleave = input_125_interleave_0, values = (var_3529_cast_fp16, var_3531_cast_fp16, var_3533_cast_fp16, var_3535_cast_fp16, var_3537_cast_fp16, var_3539_cast_fp16, var_3541_cast_fp16, var_3543_cast_fp16, var_3545_cast_fp16, var_3547_cast_fp16, var_3549_cast_fp16, var_3551_cast_fp16, var_3553_cast_fp16, var_3555_cast_fp16, var_3557_cast_fp16, var_3559_cast_fp16, var_3561_cast_fp16, var_3563_cast_fp16, var_3565_cast_fp16, var_3567_cast_fp16))[name = tensor<string, []>("input_125_cast_fp16")];
+            tensor<string, []> var_3576_pad_type_0 = const()[name = tensor<string, []>("op_3576_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3576_strides_0 = const()[name = tensor<string, []>("op_3576_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3576_pad_0 = const()[name = tensor<string, []>("op_3576_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3576_dilations_0 = const()[name = tensor<string, []>("op_3576_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3576_groups_0 = const()[name = tensor<string, []>("op_3576_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_12_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496370752)))];
+            tensor<fp16, [1280]> blocks_12_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(499647616)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3576_cast_fp16 = conv(bias = blocks_12_attn_out_bias_to_fp16, dilations = var_3576_dilations_0, groups = var_3576_groups_0, pad = var_3576_pad_0, pad_type = var_3576_pad_type_0, strides = var_3576_strides_0, weight = blocks_12_attn_out_weight_to_fp16, x = input_125_cast_fp16)[name = tensor<string, []>("op_3576_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = var_3576_cast_fp16)[name = tensor<string, []>("inputs_51_cast_fp16")];
+            tensor<int32, [1]> input_127_axes_0 = const()[name = tensor<string, []>("input_127_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_127_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_127_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(499650240)))];
+            tensor<fp16, [1280]> input_127_beta_0_to_fp16 = const()[name = tensor<string, []>("input_127_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(499652864)))];
+            tensor<fp16, []> var_3586_to_fp16 = const()[name = tensor<string, []>("op_3586_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_127_cast_fp16 = layer_norm(axes = input_127_axes_0, beta = input_127_beta_0_to_fp16, epsilon = var_3586_to_fp16, gamma = input_127_gamma_0_to_fp16, x = inputs_51_cast_fp16)[name = tensor<string, []>("input_127_cast_fp16")];
+            tensor<string, []> input_129_pad_type_0 = const()[name = tensor<string, []>("input_129_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_129_strides_0 = const()[name = tensor<string, []>("input_129_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_129_pad_0 = const()[name = tensor<string, []>("input_129_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_129_dilations_0 = const()[name = tensor<string, []>("input_129_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_129_groups_0 = const()[name = tensor<string, []>("input_129_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_12_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(499655488)))];
+            tensor<fp16, [5120]> blocks_12_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(512762752)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_129_cast_fp16 = conv(bias = blocks_12_mlp_0_bias_to_fp16, dilations = input_129_dilations_0, groups = input_129_groups_0, pad = input_129_pad_0, pad_type = input_129_pad_type_0, strides = input_129_strides_0, weight = blocks_12_mlp_0_weight_to_fp16, x = input_127_cast_fp16)[name = tensor<string, []>("input_129_cast_fp16")];
+            tensor<string, []> input_131_mode_0 = const()[name = tensor<string, []>("input_131_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_131_cast_fp16 = gelu(mode = input_131_mode_0, x = input_129_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")];
+            tensor<string, []> var_3612_pad_type_0 = const()[name = tensor<string, []>("op_3612_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3612_strides_0 = const()[name = tensor<string, []>("op_3612_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3612_pad_0 = const()[name = tensor<string, []>("op_3612_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3612_dilations_0 = const()[name = tensor<string, []>("op_3612_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3612_groups_0 = const()[name = tensor<string, []>("op_3612_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_12_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(512773056)))];
+            tensor<fp16, [1280]> blocks_12_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(525880320)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3612_cast_fp16 = conv(bias = blocks_12_mlp_2_bias_to_fp16, dilations = var_3612_dilations_0, groups = var_3612_groups_0, pad = var_3612_pad_0, pad_type = var_3612_pad_type_0, strides = var_3612_strides_0, weight = blocks_12_mlp_2_weight_to_fp16, x = input_131_cast_fp16)[name = tensor<string, []>("op_3612_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = var_3612_cast_fp16)[name = tensor<string, []>("inputs_53_cast_fp16")];
+            tensor<int32, []> var_3621 = const()[name = tensor<string, []>("op_3621"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_133_axes_0 = const()[name = tensor<string, []>("input_133_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_133_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_133_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(525882944)))];
+            tensor<fp16, [1280]> input_133_beta_0_to_fp16 = const()[name = tensor<string, []>("input_133_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(525885568)))];
+            tensor<fp16, []> var_3637_to_fp16 = const()[name = tensor<string, []>("op_3637_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_133_cast_fp16 = layer_norm(axes = input_133_axes_0, beta = input_133_beta_0_to_fp16, epsilon = var_3637_to_fp16, gamma = input_133_gamma_0_to_fp16, x = inputs_53_cast_fp16)[name = tensor<string, []>("input_133_cast_fp16")];
+            tensor<string, []> q_27_pad_type_0 = const()[name = tensor<string, []>("q_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_27_strides_0 = const()[name = tensor<string, []>("q_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_27_pad_0 = const()[name = tensor<string, []>("q_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_27_dilations_0 = const()[name = tensor<string, []>("q_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_27_groups_0 = const()[name = tensor<string, []>("q_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_3672_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3672_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(525888192)))];
+            tensor<fp16, [1280]> var_3672_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3672_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(529165056)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3672_cast_fp16 = conv(bias = var_3672_bias_0_to_fp16, dilations = q_27_dilations_0, groups = q_27_groups_0, pad = q_27_pad_0, pad_type = q_27_pad_type_0, strides = q_27_strides_0, weight = var_3672_weight_0_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("op_3672_cast_fp16")];
+            tensor<string, []> k_27_pad_type_0 = const()[name = tensor<string, []>("k_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_27_strides_0 = const()[name = tensor<string, []>("k_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_27_pad_0 = const()[name = tensor<string, []>("k_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_27_dilations_0 = const()[name = tensor<string, []>("k_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_27_groups_0 = const()[name = tensor<string, []>("k_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_13_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(529167680)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_27_cast_fp16 = conv(dilations = k_27_dilations_0, groups = k_27_groups_0, pad = k_27_pad_0, pad_type = k_27_pad_type_0, strides = k_27_strides_0, weight = blocks_13_attn_key_weight_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("k_27_cast_fp16")];
+            tensor<string, []> var_3670_pad_type_0 = const()[name = tensor<string, []>("op_3670_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3670_strides_0 = const()[name = tensor<string, []>("op_3670_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3670_pad_0 = const()[name = tensor<string, []>("op_3670_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3670_dilations_0 = const()[name = tensor<string, []>("op_3670_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3670_groups_0 = const()[name = tensor<string, []>("op_3670_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_13_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(532444544)))];
+            tensor<fp16, [1280]> blocks_13_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(535721408)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3670_cast_fp16 = conv(bias = blocks_13_attn_value_bias_to_fp16, dilations = var_3670_dilations_0, groups = var_3670_groups_0, pad = var_3670_pad_0, pad_type = var_3670_pad_type_0, strides = var_3670_strides_0, weight = blocks_13_attn_value_weight_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("op_3670_cast_fp16")];
+            tensor<int32, [20]> tile_39 = const()[name = tensor<string, []>("tile_39"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3673_axis_0 = const()[name = tensor<string, []>("op_3673_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_19 = split(axis = var_3673_axis_0, split_sizes = tile_39, x = var_3672_cast_fp16)[name = tensor<string, []>("op_3673_cast_fp16")];
+            tensor<int32, [4]> var_3694_perm_0 = const()[name = tensor<string, []>("op_3694_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_40 = const()[name = tensor<string, []>("tile_40"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3695_axis_0 = const()[name = tensor<string, []>("op_3695_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_3694_cast_fp16 = transpose(perm = var_3694_perm_0, x = k_27_cast_fp16)[name = tensor<string, []>("transpose_19")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_19 = split(axis = var_3695_axis_0, split_sizes = tile_40, x = var_3694_cast_fp16)[name = tensor<string, []>("op_3695_cast_fp16")];
+            tensor<int32, [20]> tile_41 = const()[name = tensor<string, []>("tile_41"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3716_axis_0 = const()[name = tensor<string, []>("op_3716_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_19 = split(axis = var_3716_axis_0, split_sizes = tile_41, x = var_3670_cast_fp16)[name = tensor<string, []>("op_3716_cast_fp16")];
+            tensor<string, []> aw_521_equation_0 = const()[name = tensor<string, []>("aw_521_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_521_cast_fp16 = einsum(equation = aw_521_equation_0, values = (var_3695_cast_fp16_0, var_3673_cast_fp16_0))[name = tensor<string, []>("aw_521_cast_fp16")];
+            tensor<string, []> aw_523_equation_0 = const()[name = tensor<string, []>("aw_523_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_523_cast_fp16 = einsum(equation = aw_523_equation_0, values = (var_3695_cast_fp16_1, var_3673_cast_fp16_1))[name = tensor<string, []>("aw_523_cast_fp16")];
+            tensor<string, []> aw_525_equation_0 = const()[name = tensor<string, []>("aw_525_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_525_cast_fp16 = einsum(equation = aw_525_equation_0, values = (var_3695_cast_fp16_2, var_3673_cast_fp16_2))[name = tensor<string, []>("aw_525_cast_fp16")];
+            tensor<string, []> aw_527_equation_0 = const()[name = tensor<string, []>("aw_527_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_527_cast_fp16 = einsum(equation = aw_527_equation_0, values = (var_3695_cast_fp16_3, var_3673_cast_fp16_3))[name = tensor<string, []>("aw_527_cast_fp16")];
+            tensor<string, []> aw_529_equation_0 = const()[name = tensor<string, []>("aw_529_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_529_cast_fp16 = einsum(equation = aw_529_equation_0, values = (var_3695_cast_fp16_4, var_3673_cast_fp16_4))[name = tensor<string, []>("aw_529_cast_fp16")];
+            tensor<string, []> aw_531_equation_0 = const()[name = tensor<string, []>("aw_531_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_531_cast_fp16 = einsum(equation = aw_531_equation_0, values = (var_3695_cast_fp16_5, var_3673_cast_fp16_5))[name = tensor<string, []>("aw_531_cast_fp16")];
+            tensor<string, []> aw_533_equation_0 = const()[name = tensor<string, []>("aw_533_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_533_cast_fp16 = einsum(equation = aw_533_equation_0, values = (var_3695_cast_fp16_6, var_3673_cast_fp16_6))[name = tensor<string, []>("aw_533_cast_fp16")];
+            tensor<string, []> aw_535_equation_0 = const()[name = tensor<string, []>("aw_535_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_535_cast_fp16 = einsum(equation = aw_535_equation_0, values = (var_3695_cast_fp16_7, var_3673_cast_fp16_7))[name = tensor<string, []>("aw_535_cast_fp16")];
+            tensor<string, []> aw_537_equation_0 = const()[name = tensor<string, []>("aw_537_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_537_cast_fp16 = einsum(equation = aw_537_equation_0, values = (var_3695_cast_fp16_8, var_3673_cast_fp16_8))[name = tensor<string, []>("aw_537_cast_fp16")];
+            tensor<string, []> aw_539_equation_0 = const()[name = tensor<string, []>("aw_539_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_539_cast_fp16 = einsum(equation = aw_539_equation_0, values = (var_3695_cast_fp16_9, var_3673_cast_fp16_9))[name = tensor<string, []>("aw_539_cast_fp16")];
+            tensor<string, []> aw_541_equation_0 = const()[name = tensor<string, []>("aw_541_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_541_cast_fp16 = einsum(equation = aw_541_equation_0, values = (var_3695_cast_fp16_10, var_3673_cast_fp16_10))[name = tensor<string, []>("aw_541_cast_fp16")];
+            tensor<string, []> aw_543_equation_0 = const()[name = tensor<string, []>("aw_543_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_543_cast_fp16 = einsum(equation = aw_543_equation_0, values = (var_3695_cast_fp16_11, var_3673_cast_fp16_11))[name = tensor<string, []>("aw_543_cast_fp16")];
+            tensor<string, []> aw_545_equation_0 = const()[name = tensor<string, []>("aw_545_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_545_cast_fp16 = einsum(equation = aw_545_equation_0, values = (var_3695_cast_fp16_12, var_3673_cast_fp16_12))[name = tensor<string, []>("aw_545_cast_fp16")];
+            tensor<string, []> aw_547_equation_0 = const()[name = tensor<string, []>("aw_547_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_547_cast_fp16 = einsum(equation = aw_547_equation_0, values = (var_3695_cast_fp16_13, var_3673_cast_fp16_13))[name = tensor<string, []>("aw_547_cast_fp16")];
+            tensor<string, []> aw_549_equation_0 = const()[name = tensor<string, []>("aw_549_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_549_cast_fp16 = einsum(equation = aw_549_equation_0, values = (var_3695_cast_fp16_14, var_3673_cast_fp16_14))[name = tensor<string, []>("aw_549_cast_fp16")];
+            tensor<string, []> aw_551_equation_0 = const()[name = tensor<string, []>("aw_551_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_551_cast_fp16 = einsum(equation = aw_551_equation_0, values = (var_3695_cast_fp16_15, var_3673_cast_fp16_15))[name = tensor<string, []>("aw_551_cast_fp16")];
+            tensor<string, []> aw_553_equation_0 = const()[name = tensor<string, []>("aw_553_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_553_cast_fp16 = einsum(equation = aw_553_equation_0, values = (var_3695_cast_fp16_16, var_3673_cast_fp16_16))[name = tensor<string, []>("aw_553_cast_fp16")];
+            tensor<string, []> aw_555_equation_0 = const()[name = tensor<string, []>("aw_555_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_555_cast_fp16 = einsum(equation = aw_555_equation_0, values = (var_3695_cast_fp16_17, var_3673_cast_fp16_17))[name = tensor<string, []>("aw_555_cast_fp16")];
+            tensor<string, []> aw_557_equation_0 = const()[name = tensor<string, []>("aw_557_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_557_cast_fp16 = einsum(equation = aw_557_equation_0, values = (var_3695_cast_fp16_18, var_3673_cast_fp16_18))[name = tensor<string, []>("aw_557_cast_fp16")];
+            tensor<string, []> aw_559_equation_0 = const()[name = tensor<string, []>("aw_559_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_559_cast_fp16 = einsum(equation = aw_559_equation_0, values = (var_3695_cast_fp16_19, var_3673_cast_fp16_19))[name = tensor<string, []>("aw_559_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3777_cast_fp16 = softmax(axis = var_3621, x = aw_521_cast_fp16)[name = tensor<string, []>("op_3777_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3778_cast_fp16 = softmax(axis = var_3621, x = aw_523_cast_fp16)[name = tensor<string, []>("op_3778_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3779_cast_fp16 = softmax(axis = var_3621, x = aw_525_cast_fp16)[name = tensor<string, []>("op_3779_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3780_cast_fp16 = softmax(axis = var_3621, x = aw_527_cast_fp16)[name = tensor<string, []>("op_3780_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3781_cast_fp16 = softmax(axis = var_3621, x = aw_529_cast_fp16)[name = tensor<string, []>("op_3781_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3782_cast_fp16 = softmax(axis = var_3621, x = aw_531_cast_fp16)[name = tensor<string, []>("op_3782_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3783_cast_fp16 = softmax(axis = var_3621, x = aw_533_cast_fp16)[name = tensor<string, []>("op_3783_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3784_cast_fp16 = softmax(axis = var_3621, x = aw_535_cast_fp16)[name = tensor<string, []>("op_3784_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3785_cast_fp16 = softmax(axis = var_3621, x = aw_537_cast_fp16)[name = tensor<string, []>("op_3785_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3786_cast_fp16 = softmax(axis = var_3621, x = aw_539_cast_fp16)[name = tensor<string, []>("op_3786_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3787_cast_fp16 = softmax(axis = var_3621, x = aw_541_cast_fp16)[name = tensor<string, []>("op_3787_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3788_cast_fp16 = softmax(axis = var_3621, x = aw_543_cast_fp16)[name = tensor<string, []>("op_3788_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3789_cast_fp16 = softmax(axis = var_3621, x = aw_545_cast_fp16)[name = tensor<string, []>("op_3789_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3790_cast_fp16 = softmax(axis = var_3621, x = aw_547_cast_fp16)[name = tensor<string, []>("op_3790_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3791_cast_fp16 = softmax(axis = var_3621, x = aw_549_cast_fp16)[name = tensor<string, []>("op_3791_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3792_cast_fp16 = softmax(axis = var_3621, x = aw_551_cast_fp16)[name = tensor<string, []>("op_3792_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3793_cast_fp16 = softmax(axis = var_3621, x = aw_553_cast_fp16)[name = tensor<string, []>("op_3793_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3794_cast_fp16 = softmax(axis = var_3621, x = aw_555_cast_fp16)[name = tensor<string, []>("op_3794_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3795_cast_fp16 = softmax(axis = var_3621, x = aw_557_cast_fp16)[name = tensor<string, []>("op_3795_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3796_cast_fp16 = softmax(axis = var_3621, x = aw_559_cast_fp16)[name = tensor<string, []>("op_3796_cast_fp16")];
+            tensor<string, []> var_3798_equation_0 = const()[name = tensor<string, []>("op_3798_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3798_cast_fp16 = einsum(equation = var_3798_equation_0, values = (var_3716_cast_fp16_0, var_3777_cast_fp16))[name = tensor<string, []>("op_3798_cast_fp16")];
+            tensor<string, []> var_3800_equation_0 = const()[name = tensor<string, []>("op_3800_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3800_cast_fp16 = einsum(equation = var_3800_equation_0, values = (var_3716_cast_fp16_1, var_3778_cast_fp16))[name = tensor<string, []>("op_3800_cast_fp16")];
+            tensor<string, []> var_3802_equation_0 = const()[name = tensor<string, []>("op_3802_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3802_cast_fp16 = einsum(equation = var_3802_equation_0, values = (var_3716_cast_fp16_2, var_3779_cast_fp16))[name = tensor<string, []>("op_3802_cast_fp16")];
+            tensor<string, []> var_3804_equation_0 = const()[name = tensor<string, []>("op_3804_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3804_cast_fp16 = einsum(equation = var_3804_equation_0, values = (var_3716_cast_fp16_3, var_3780_cast_fp16))[name = tensor<string, []>("op_3804_cast_fp16")];
+            tensor<string, []> var_3806_equation_0 = const()[name = tensor<string, []>("op_3806_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3806_cast_fp16 = einsum(equation = var_3806_equation_0, values = (var_3716_cast_fp16_4, var_3781_cast_fp16))[name = tensor<string, []>("op_3806_cast_fp16")];
+            tensor<string, []> var_3808_equation_0 = const()[name = tensor<string, []>("op_3808_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3808_cast_fp16 = einsum(equation = var_3808_equation_0, values = (var_3716_cast_fp16_5, var_3782_cast_fp16))[name = tensor<string, []>("op_3808_cast_fp16")];
+            tensor<string, []> var_3810_equation_0 = const()[name = tensor<string, []>("op_3810_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3810_cast_fp16 = einsum(equation = var_3810_equation_0, values = (var_3716_cast_fp16_6, var_3783_cast_fp16))[name = tensor<string, []>("op_3810_cast_fp16")];
+            tensor<string, []> var_3812_equation_0 = const()[name = tensor<string, []>("op_3812_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3812_cast_fp16 = einsum(equation = var_3812_equation_0, values = (var_3716_cast_fp16_7, var_3784_cast_fp16))[name = tensor<string, []>("op_3812_cast_fp16")];
+            tensor<string, []> var_3814_equation_0 = const()[name = tensor<string, []>("op_3814_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3814_cast_fp16 = einsum(equation = var_3814_equation_0, values = (var_3716_cast_fp16_8, var_3785_cast_fp16))[name = tensor<string, []>("op_3814_cast_fp16")];
+            tensor<string, []> var_3816_equation_0 = const()[name = tensor<string, []>("op_3816_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3816_cast_fp16 = einsum(equation = var_3816_equation_0, values = (var_3716_cast_fp16_9, var_3786_cast_fp16))[name = tensor<string, []>("op_3816_cast_fp16")];
+            tensor<string, []> var_3818_equation_0 = const()[name = tensor<string, []>("op_3818_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3818_cast_fp16 = einsum(equation = var_3818_equation_0, values = (var_3716_cast_fp16_10, var_3787_cast_fp16))[name = tensor<string, []>("op_3818_cast_fp16")];
+            tensor<string, []> var_3820_equation_0 = const()[name = tensor<string, []>("op_3820_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3820_cast_fp16 = einsum(equation = var_3820_equation_0, values = (var_3716_cast_fp16_11, var_3788_cast_fp16))[name = tensor<string, []>("op_3820_cast_fp16")];
+            tensor<string, []> var_3822_equation_0 = const()[name = tensor<string, []>("op_3822_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3822_cast_fp16 = einsum(equation = var_3822_equation_0, values = (var_3716_cast_fp16_12, var_3789_cast_fp16))[name = tensor<string, []>("op_3822_cast_fp16")];
+            tensor<string, []> var_3824_equation_0 = const()[name = tensor<string, []>("op_3824_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3824_cast_fp16 = einsum(equation = var_3824_equation_0, values = (var_3716_cast_fp16_13, var_3790_cast_fp16))[name = tensor<string, []>("op_3824_cast_fp16")];
+            tensor<string, []> var_3826_equation_0 = const()[name = tensor<string, []>("op_3826_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3826_cast_fp16 = einsum(equation = var_3826_equation_0, values = (var_3716_cast_fp16_14, var_3791_cast_fp16))[name = tensor<string, []>("op_3826_cast_fp16")];
+            tensor<string, []> var_3828_equation_0 = const()[name = tensor<string, []>("op_3828_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3828_cast_fp16 = einsum(equation = var_3828_equation_0, values = (var_3716_cast_fp16_15, var_3792_cast_fp16))[name = tensor<string, []>("op_3828_cast_fp16")];
+            tensor<string, []> var_3830_equation_0 = const()[name = tensor<string, []>("op_3830_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3830_cast_fp16 = einsum(equation = var_3830_equation_0, values = (var_3716_cast_fp16_16, var_3793_cast_fp16))[name = tensor<string, []>("op_3830_cast_fp16")];
+            tensor<string, []> var_3832_equation_0 = const()[name = tensor<string, []>("op_3832_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3832_cast_fp16 = einsum(equation = var_3832_equation_0, values = (var_3716_cast_fp16_17, var_3794_cast_fp16))[name = tensor<string, []>("op_3832_cast_fp16")];
+            tensor<string, []> var_3834_equation_0 = const()[name = tensor<string, []>("op_3834_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3834_cast_fp16 = einsum(equation = var_3834_equation_0, values = (var_3716_cast_fp16_18, var_3795_cast_fp16))[name = tensor<string, []>("op_3834_cast_fp16")];
+            tensor<string, []> var_3836_equation_0 = const()[name = tensor<string, []>("op_3836_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3836_cast_fp16 = einsum(equation = var_3836_equation_0, values = (var_3716_cast_fp16_19, var_3796_cast_fp16))[name = tensor<string, []>("op_3836_cast_fp16")];
+            tensor<bool, []> input_135_interleave_0 = const()[name = tensor<string, []>("input_135_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_135_cast_fp16 = concat(axis = var_3621, interleave = input_135_interleave_0, values = (var_3798_cast_fp16, var_3800_cast_fp16, var_3802_cast_fp16, var_3804_cast_fp16, var_3806_cast_fp16, var_3808_cast_fp16, var_3810_cast_fp16, var_3812_cast_fp16, var_3814_cast_fp16, var_3816_cast_fp16, var_3818_cast_fp16, var_3820_cast_fp16, var_3822_cast_fp16, var_3824_cast_fp16, var_3826_cast_fp16, var_3828_cast_fp16, var_3830_cast_fp16, var_3832_cast_fp16, var_3834_cast_fp16, var_3836_cast_fp16))[name = tensor<string, []>("input_135_cast_fp16")];
+            tensor<string, []> var_3845_pad_type_0 = const()[name = tensor<string, []>("op_3845_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3845_strides_0 = const()[name = tensor<string, []>("op_3845_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3845_pad_0 = const()[name = tensor<string, []>("op_3845_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3845_dilations_0 = const()[name = tensor<string, []>("op_3845_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3845_groups_0 = const()[name = tensor<string, []>("op_3845_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_13_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(535724032)))];
+            tensor<fp16, [1280]> blocks_13_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(539000896)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3845_cast_fp16 = conv(bias = blocks_13_attn_out_bias_to_fp16, dilations = var_3845_dilations_0, groups = var_3845_groups_0, pad = var_3845_pad_0, pad_type = var_3845_pad_type_0, strides = var_3845_strides_0, weight = blocks_13_attn_out_weight_to_fp16, x = input_135_cast_fp16)[name = tensor<string, []>("op_3845_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = var_3845_cast_fp16)[name = tensor<string, []>("inputs_55_cast_fp16")];
+            tensor<int32, [1]> input_137_axes_0 = const()[name = tensor<string, []>("input_137_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_137_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_137_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(539003520)))];
+            tensor<fp16, [1280]> input_137_beta_0_to_fp16 = const()[name = tensor<string, []>("input_137_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(539006144)))];
+            tensor<fp16, []> var_3855_to_fp16 = const()[name = tensor<string, []>("op_3855_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_137_cast_fp16 = layer_norm(axes = input_137_axes_0, beta = input_137_beta_0_to_fp16, epsilon = var_3855_to_fp16, gamma = input_137_gamma_0_to_fp16, x = inputs_55_cast_fp16)[name = tensor<string, []>("input_137_cast_fp16")];
+            tensor<string, []> input_139_pad_type_0 = const()[name = tensor<string, []>("input_139_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_139_strides_0 = const()[name = tensor<string, []>("input_139_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_139_pad_0 = const()[name = tensor<string, []>("input_139_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_139_dilations_0 = const()[name = tensor<string, []>("input_139_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_139_groups_0 = const()[name = tensor<string, []>("input_139_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_13_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(539008768)))];
+            tensor<fp16, [5120]> blocks_13_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(552116032)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_139_cast_fp16 = conv(bias = blocks_13_mlp_0_bias_to_fp16, dilations = input_139_dilations_0, groups = input_139_groups_0, pad = input_139_pad_0, pad_type = input_139_pad_type_0, strides = input_139_strides_0, weight = blocks_13_mlp_0_weight_to_fp16, x = input_137_cast_fp16)[name = tensor<string, []>("input_139_cast_fp16")];
+            tensor<string, []> input_141_mode_0 = const()[name = tensor<string, []>("input_141_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_141_cast_fp16 = gelu(mode = input_141_mode_0, x = input_139_cast_fp16)[name = tensor<string, []>("input_141_cast_fp16")];
+            tensor<string, []> var_3881_pad_type_0 = const()[name = tensor<string, []>("op_3881_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3881_strides_0 = const()[name = tensor<string, []>("op_3881_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3881_pad_0 = const()[name = tensor<string, []>("op_3881_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3881_dilations_0 = const()[name = tensor<string, []>("op_3881_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3881_groups_0 = const()[name = tensor<string, []>("op_3881_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_13_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(552126336)))];
+            tensor<fp16, [1280]> blocks_13_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(565233600)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3881_cast_fp16 = conv(bias = blocks_13_mlp_2_bias_to_fp16, dilations = var_3881_dilations_0, groups = var_3881_groups_0, pad = var_3881_pad_0, pad_type = var_3881_pad_type_0, strides = var_3881_strides_0, weight = blocks_13_mlp_2_weight_to_fp16, x = input_141_cast_fp16)[name = tensor<string, []>("op_3881_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = var_3881_cast_fp16)[name = tensor<string, []>("inputs_57_cast_fp16")];
+            tensor<int32, []> var_3890 = const()[name = tensor<string, []>("op_3890"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_143_axes_0 = const()[name = tensor<string, []>("input_143_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_143_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_143_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(565236224)))];
+            tensor<fp16, [1280]> input_143_beta_0_to_fp16 = const()[name = tensor<string, []>("input_143_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(565238848)))];
+            tensor<fp16, []> var_3906_to_fp16 = const()[name = tensor<string, []>("op_3906_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_143_cast_fp16 = layer_norm(axes = input_143_axes_0, beta = input_143_beta_0_to_fp16, epsilon = var_3906_to_fp16, gamma = input_143_gamma_0_to_fp16, x = inputs_57_cast_fp16)[name = tensor<string, []>("input_143_cast_fp16")];
+            tensor<string, []> q_29_pad_type_0 = const()[name = tensor<string, []>("q_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_29_strides_0 = const()[name = tensor<string, []>("q_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_29_pad_0 = const()[name = tensor<string, []>("q_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_29_dilations_0 = const()[name = tensor<string, []>("q_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_29_groups_0 = const()[name = tensor<string, []>("q_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_3941_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3941_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(565241472)))];
+            tensor<fp16, [1280]> var_3941_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3941_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(568518336)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3941_cast_fp16 = conv(bias = var_3941_bias_0_to_fp16, dilations = q_29_dilations_0, groups = q_29_groups_0, pad = q_29_pad_0, pad_type = q_29_pad_type_0, strides = q_29_strides_0, weight = var_3941_weight_0_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("op_3941_cast_fp16")];
+            tensor<string, []> k_29_pad_type_0 = const()[name = tensor<string, []>("k_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_29_strides_0 = const()[name = tensor<string, []>("k_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_29_pad_0 = const()[name = tensor<string, []>("k_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_29_dilations_0 = const()[name = tensor<string, []>("k_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_29_groups_0 = const()[name = tensor<string, []>("k_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_14_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(568520960)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_29_cast_fp16 = conv(dilations = k_29_dilations_0, groups = k_29_groups_0, pad = k_29_pad_0, pad_type = k_29_pad_type_0, strides = k_29_strides_0, weight = blocks_14_attn_key_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("k_29_cast_fp16")];
+            tensor<string, []> var_3939_pad_type_0 = const()[name = tensor<string, []>("op_3939_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3939_strides_0 = const()[name = tensor<string, []>("op_3939_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3939_pad_0 = const()[name = tensor<string, []>("op_3939_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3939_dilations_0 = const()[name = tensor<string, []>("op_3939_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3939_groups_0 = const()[name = tensor<string, []>("op_3939_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_14_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(571797824)))];
+            tensor<fp16, [1280]> blocks_14_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(575074688)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3939_cast_fp16 = conv(bias = blocks_14_attn_value_bias_to_fp16, dilations = var_3939_dilations_0, groups = var_3939_groups_0, pad = var_3939_pad_0, pad_type = var_3939_pad_type_0, strides = var_3939_strides_0, weight = blocks_14_attn_value_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("op_3939_cast_fp16")];
+            tensor<int32, [20]> tile_42 = const()[name = tensor<string, []>("tile_42"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3942_axis_0 = const()[name = tensor<string, []>("op_3942_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_19 = split(axis = var_3942_axis_0, split_sizes = tile_42, x = var_3941_cast_fp16)[name = tensor<string, []>("op_3942_cast_fp16")];
+            tensor<int32, [4]> var_3963_perm_0 = const()[name = tensor<string, []>("op_3963_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_43 = const()[name = tensor<string, []>("tile_43"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3964_axis_0 = const()[name = tensor<string, []>("op_3964_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_3963_cast_fp16 = transpose(perm = var_3963_perm_0, x = k_29_cast_fp16)[name = tensor<string, []>("transpose_18")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_19 = split(axis = var_3964_axis_0, split_sizes = tile_43, x = var_3963_cast_fp16)[name = tensor<string, []>("op_3964_cast_fp16")];
+            tensor<int32, [20]> tile_44 = const()[name = tensor<string, []>("tile_44"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3985_axis_0 = const()[name = tensor<string, []>("op_3985_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_19 = split(axis = var_3985_axis_0, split_sizes = tile_44, x = var_3939_cast_fp16)[name = tensor<string, []>("op_3985_cast_fp16")];
+            tensor<string, []> aw_561_equation_0 = const()[name = tensor<string, []>("aw_561_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_561_cast_fp16 = einsum(equation = aw_561_equation_0, values = (var_3964_cast_fp16_0, var_3942_cast_fp16_0))[name = tensor<string, []>("aw_561_cast_fp16")];
+            tensor<string, []> aw_563_equation_0 = const()[name = tensor<string, []>("aw_563_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_563_cast_fp16 = einsum(equation = aw_563_equation_0, values = (var_3964_cast_fp16_1, var_3942_cast_fp16_1))[name = tensor<string, []>("aw_563_cast_fp16")];
+            tensor<string, []> aw_565_equation_0 = const()[name = tensor<string, []>("aw_565_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_565_cast_fp16 = einsum(equation = aw_565_equation_0, values = (var_3964_cast_fp16_2, var_3942_cast_fp16_2))[name = tensor<string, []>("aw_565_cast_fp16")];
+            tensor<string, []> aw_567_equation_0 = const()[name = tensor<string, []>("aw_567_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_567_cast_fp16 = einsum(equation = aw_567_equation_0, values = (var_3964_cast_fp16_3, var_3942_cast_fp16_3))[name = tensor<string, []>("aw_567_cast_fp16")];
+            tensor<string, []> aw_569_equation_0 = const()[name = tensor<string, []>("aw_569_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_569_cast_fp16 = einsum(equation = aw_569_equation_0, values = (var_3964_cast_fp16_4, var_3942_cast_fp16_4))[name = tensor<string, []>("aw_569_cast_fp16")];
+            tensor<string, []> aw_571_equation_0 = const()[name = tensor<string, []>("aw_571_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_571_cast_fp16 = einsum(equation = aw_571_equation_0, values = (var_3964_cast_fp16_5, var_3942_cast_fp16_5))[name = tensor<string, []>("aw_571_cast_fp16")];
+            tensor<string, []> aw_573_equation_0 = const()[name = tensor<string, []>("aw_573_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_573_cast_fp16 = einsum(equation = aw_573_equation_0, values = (var_3964_cast_fp16_6, var_3942_cast_fp16_6))[name = tensor<string, []>("aw_573_cast_fp16")];
+            tensor<string, []> aw_575_equation_0 = const()[name = tensor<string, []>("aw_575_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_575_cast_fp16 = einsum(equation = aw_575_equation_0, values = (var_3964_cast_fp16_7, var_3942_cast_fp16_7))[name = tensor<string, []>("aw_575_cast_fp16")];
+            tensor<string, []> aw_577_equation_0 = const()[name = tensor<string, []>("aw_577_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_577_cast_fp16 = einsum(equation = aw_577_equation_0, values = (var_3964_cast_fp16_8, var_3942_cast_fp16_8))[name = tensor<string, []>("aw_577_cast_fp16")];
+            tensor<string, []> aw_579_equation_0 = const()[name = tensor<string, []>("aw_579_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_579_cast_fp16 = einsum(equation = aw_579_equation_0, values = (var_3964_cast_fp16_9, var_3942_cast_fp16_9))[name = tensor<string, []>("aw_579_cast_fp16")];
+            tensor<string, []> aw_581_equation_0 = const()[name = tensor<string, []>("aw_581_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_581_cast_fp16 = einsum(equation = aw_581_equation_0, values = (var_3964_cast_fp16_10, var_3942_cast_fp16_10))[name = tensor<string, []>("aw_581_cast_fp16")];
+            tensor<string, []> aw_583_equation_0 = const()[name = tensor<string, []>("aw_583_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_583_cast_fp16 = einsum(equation = aw_583_equation_0, values = (var_3964_cast_fp16_11, var_3942_cast_fp16_11))[name = tensor<string, []>("aw_583_cast_fp16")];
+            tensor<string, []> aw_585_equation_0 = const()[name = tensor<string, []>("aw_585_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_585_cast_fp16 = einsum(equation = aw_585_equation_0, values = (var_3964_cast_fp16_12, var_3942_cast_fp16_12))[name = tensor<string, []>("aw_585_cast_fp16")];
+            tensor<string, []> aw_587_equation_0 = const()[name = tensor<string, []>("aw_587_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_587_cast_fp16 = einsum(equation = aw_587_equation_0, values = (var_3964_cast_fp16_13, var_3942_cast_fp16_13))[name = tensor<string, []>("aw_587_cast_fp16")];
+            tensor<string, []> aw_589_equation_0 = const()[name = tensor<string, []>("aw_589_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_589_cast_fp16 = einsum(equation = aw_589_equation_0, values = (var_3964_cast_fp16_14, var_3942_cast_fp16_14))[name = tensor<string, []>("aw_589_cast_fp16")];
+            tensor<string, []> aw_591_equation_0 = const()[name = tensor<string, []>("aw_591_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_591_cast_fp16 = einsum(equation = aw_591_equation_0, values = (var_3964_cast_fp16_15, var_3942_cast_fp16_15))[name = tensor<string, []>("aw_591_cast_fp16")];
+            tensor<string, []> aw_593_equation_0 = const()[name = tensor<string, []>("aw_593_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_593_cast_fp16 = einsum(equation = aw_593_equation_0, values = (var_3964_cast_fp16_16, var_3942_cast_fp16_16))[name = tensor<string, []>("aw_593_cast_fp16")];
+            tensor<string, []> aw_595_equation_0 = const()[name = tensor<string, []>("aw_595_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_595_cast_fp16 = einsum(equation = aw_595_equation_0, values = (var_3964_cast_fp16_17, var_3942_cast_fp16_17))[name = tensor<string, []>("aw_595_cast_fp16")];
+            tensor<string, []> aw_597_equation_0 = const()[name = tensor<string, []>("aw_597_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_597_cast_fp16 = einsum(equation = aw_597_equation_0, values = (var_3964_cast_fp16_18, var_3942_cast_fp16_18))[name = tensor<string, []>("aw_597_cast_fp16")];
+            tensor<string, []> aw_599_equation_0 = const()[name = tensor<string, []>("aw_599_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_599_cast_fp16 = einsum(equation = aw_599_equation_0, values = (var_3964_cast_fp16_19, var_3942_cast_fp16_19))[name = tensor<string, []>("aw_599_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4046_cast_fp16 = softmax(axis = var_3890, x = aw_561_cast_fp16)[name = tensor<string, []>("op_4046_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4047_cast_fp16 = softmax(axis = var_3890, x = aw_563_cast_fp16)[name = tensor<string, []>("op_4047_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4048_cast_fp16 = softmax(axis = var_3890, x = aw_565_cast_fp16)[name = tensor<string, []>("op_4048_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4049_cast_fp16 = softmax(axis = var_3890, x = aw_567_cast_fp16)[name = tensor<string, []>("op_4049_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4050_cast_fp16 = softmax(axis = var_3890, x = aw_569_cast_fp16)[name = tensor<string, []>("op_4050_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4051_cast_fp16 = softmax(axis = var_3890, x = aw_571_cast_fp16)[name = tensor<string, []>("op_4051_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4052_cast_fp16 = softmax(axis = var_3890, x = aw_573_cast_fp16)[name = tensor<string, []>("op_4052_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4053_cast_fp16 = softmax(axis = var_3890, x = aw_575_cast_fp16)[name = tensor<string, []>("op_4053_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4054_cast_fp16 = softmax(axis = var_3890, x = aw_577_cast_fp16)[name = tensor<string, []>("op_4054_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4055_cast_fp16 = softmax(axis = var_3890, x = aw_579_cast_fp16)[name = tensor<string, []>("op_4055_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4056_cast_fp16 = softmax(axis = var_3890, x = aw_581_cast_fp16)[name = tensor<string, []>("op_4056_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4057_cast_fp16 = softmax(axis = var_3890, x = aw_583_cast_fp16)[name = tensor<string, []>("op_4057_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4058_cast_fp16 = softmax(axis = var_3890, x = aw_585_cast_fp16)[name = tensor<string, []>("op_4058_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4059_cast_fp16 = softmax(axis = var_3890, x = aw_587_cast_fp16)[name = tensor<string, []>("op_4059_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4060_cast_fp16 = softmax(axis = var_3890, x = aw_589_cast_fp16)[name = tensor<string, []>("op_4060_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4061_cast_fp16 = softmax(axis = var_3890, x = aw_591_cast_fp16)[name = tensor<string, []>("op_4061_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4062_cast_fp16 = softmax(axis = var_3890, x = aw_593_cast_fp16)[name = tensor<string, []>("op_4062_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4063_cast_fp16 = softmax(axis = var_3890, x = aw_595_cast_fp16)[name = tensor<string, []>("op_4063_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4064_cast_fp16 = softmax(axis = var_3890, x = aw_597_cast_fp16)[name = tensor<string, []>("op_4064_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4065_cast_fp16 = softmax(axis = var_3890, x = aw_599_cast_fp16)[name = tensor<string, []>("op_4065_cast_fp16")];
+            tensor<string, []> var_4067_equation_0 = const()[name = tensor<string, []>("op_4067_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4067_cast_fp16 = einsum(equation = var_4067_equation_0, values = (var_3985_cast_fp16_0, var_4046_cast_fp16))[name = tensor<string, []>("op_4067_cast_fp16")];
+            tensor<string, []> var_4069_equation_0 = const()[name = tensor<string, []>("op_4069_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4069_cast_fp16 = einsum(equation = var_4069_equation_0, values = (var_3985_cast_fp16_1, var_4047_cast_fp16))[name = tensor<string, []>("op_4069_cast_fp16")];
+            tensor<string, []> var_4071_equation_0 = const()[name = tensor<string, []>("op_4071_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4071_cast_fp16 = einsum(equation = var_4071_equation_0, values = (var_3985_cast_fp16_2, var_4048_cast_fp16))[name = tensor<string, []>("op_4071_cast_fp16")];
+            tensor<string, []> var_4073_equation_0 = const()[name = tensor<string, []>("op_4073_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4073_cast_fp16 = einsum(equation = var_4073_equation_0, values = (var_3985_cast_fp16_3, var_4049_cast_fp16))[name = tensor<string, []>("op_4073_cast_fp16")];
+            tensor<string, []> var_4075_equation_0 = const()[name = tensor<string, []>("op_4075_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4075_cast_fp16 = einsum(equation = var_4075_equation_0, values = (var_3985_cast_fp16_4, var_4050_cast_fp16))[name = tensor<string, []>("op_4075_cast_fp16")];
+            tensor<string, []> var_4077_equation_0 = const()[name = tensor<string, []>("op_4077_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4077_cast_fp16 = einsum(equation = var_4077_equation_0, values = (var_3985_cast_fp16_5, var_4051_cast_fp16))[name = tensor<string, []>("op_4077_cast_fp16")];
+            tensor<string, []> var_4079_equation_0 = const()[name = tensor<string, []>("op_4079_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4079_cast_fp16 = einsum(equation = var_4079_equation_0, values = (var_3985_cast_fp16_6, var_4052_cast_fp16))[name = tensor<string, []>("op_4079_cast_fp16")];
+            tensor<string, []> var_4081_equation_0 = const()[name = tensor<string, []>("op_4081_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4081_cast_fp16 = einsum(equation = var_4081_equation_0, values = (var_3985_cast_fp16_7, var_4053_cast_fp16))[name = tensor<string, []>("op_4081_cast_fp16")];
+            tensor<string, []> var_4083_equation_0 = const()[name = tensor<string, []>("op_4083_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4083_cast_fp16 = einsum(equation = var_4083_equation_0, values = (var_3985_cast_fp16_8, var_4054_cast_fp16))[name = tensor<string, []>("op_4083_cast_fp16")];
+            tensor<string, []> var_4085_equation_0 = const()[name = tensor<string, []>("op_4085_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4085_cast_fp16 = einsum(equation = var_4085_equation_0, values = (var_3985_cast_fp16_9, var_4055_cast_fp16))[name = tensor<string, []>("op_4085_cast_fp16")];
+            tensor<string, []> var_4087_equation_0 = const()[name = tensor<string, []>("op_4087_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4087_cast_fp16 = einsum(equation = var_4087_equation_0, values = (var_3985_cast_fp16_10, var_4056_cast_fp16))[name = tensor<string, []>("op_4087_cast_fp16")];
+            tensor<string, []> var_4089_equation_0 = const()[name = tensor<string, []>("op_4089_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4089_cast_fp16 = einsum(equation = var_4089_equation_0, values = (var_3985_cast_fp16_11, var_4057_cast_fp16))[name = tensor<string, []>("op_4089_cast_fp16")];
+            tensor<string, []> var_4091_equation_0 = const()[name = tensor<string, []>("op_4091_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4091_cast_fp16 = einsum(equation = var_4091_equation_0, values = (var_3985_cast_fp16_12, var_4058_cast_fp16))[name = tensor<string, []>("op_4091_cast_fp16")];
+            tensor<string, []> var_4093_equation_0 = const()[name = tensor<string, []>("op_4093_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4093_cast_fp16 = einsum(equation = var_4093_equation_0, values = (var_3985_cast_fp16_13, var_4059_cast_fp16))[name = tensor<string, []>("op_4093_cast_fp16")];
+            tensor<string, []> var_4095_equation_0 = const()[name = tensor<string, []>("op_4095_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4095_cast_fp16 = einsum(equation = var_4095_equation_0, values = (var_3985_cast_fp16_14, var_4060_cast_fp16))[name = tensor<string, []>("op_4095_cast_fp16")];
+            tensor<string, []> var_4097_equation_0 = const()[name = tensor<string, []>("op_4097_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4097_cast_fp16 = einsum(equation = var_4097_equation_0, values = (var_3985_cast_fp16_15, var_4061_cast_fp16))[name = tensor<string, []>("op_4097_cast_fp16")];
+            tensor<string, []> var_4099_equation_0 = const()[name = tensor<string, []>("op_4099_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4099_cast_fp16 = einsum(equation = var_4099_equation_0, values = (var_3985_cast_fp16_16, var_4062_cast_fp16))[name = tensor<string, []>("op_4099_cast_fp16")];
+            tensor<string, []> var_4101_equation_0 = const()[name = tensor<string, []>("op_4101_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4101_cast_fp16 = einsum(equation = var_4101_equation_0, values = (var_3985_cast_fp16_17, var_4063_cast_fp16))[name = tensor<string, []>("op_4101_cast_fp16")];
+            tensor<string, []> var_4103_equation_0 = const()[name = tensor<string, []>("op_4103_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4103_cast_fp16 = einsum(equation = var_4103_equation_0, values = (var_3985_cast_fp16_18, var_4064_cast_fp16))[name = tensor<string, []>("op_4103_cast_fp16")];
+            tensor<string, []> var_4105_equation_0 = const()[name = tensor<string, []>("op_4105_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4105_cast_fp16 = einsum(equation = var_4105_equation_0, values = (var_3985_cast_fp16_19, var_4065_cast_fp16))[name = tensor<string, []>("op_4105_cast_fp16")];
+            tensor<bool, []> input_145_interleave_0 = const()[name = tensor<string, []>("input_145_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_145_cast_fp16 = concat(axis = var_3890, interleave = input_145_interleave_0, values = (var_4067_cast_fp16, var_4069_cast_fp16, var_4071_cast_fp16, var_4073_cast_fp16, var_4075_cast_fp16, var_4077_cast_fp16, var_4079_cast_fp16, var_4081_cast_fp16, var_4083_cast_fp16, var_4085_cast_fp16, var_4087_cast_fp16, var_4089_cast_fp16, var_4091_cast_fp16, var_4093_cast_fp16, var_4095_cast_fp16, var_4097_cast_fp16, var_4099_cast_fp16, var_4101_cast_fp16, var_4103_cast_fp16, var_4105_cast_fp16))[name = tensor<string, []>("input_145_cast_fp16")];
+            tensor<string, []> var_4114_pad_type_0 = const()[name = tensor<string, []>("op_4114_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4114_strides_0 = const()[name = tensor<string, []>("op_4114_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4114_pad_0 = const()[name = tensor<string, []>("op_4114_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4114_dilations_0 = const()[name = tensor<string, []>("op_4114_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4114_groups_0 = const()[name = tensor<string, []>("op_4114_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_14_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(575077312)))];
+            tensor<fp16, [1280]> blocks_14_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578354176)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4114_cast_fp16 = conv(bias = blocks_14_attn_out_bias_to_fp16, dilations = var_4114_dilations_0, groups = var_4114_groups_0, pad = var_4114_pad_0, pad_type = var_4114_pad_type_0, strides = var_4114_strides_0, weight = blocks_14_attn_out_weight_to_fp16, x = input_145_cast_fp16)[name = tensor<string, []>("op_4114_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = var_4114_cast_fp16)[name = tensor<string, []>("inputs_59_cast_fp16")];
+            tensor<int32, [1]> input_147_axes_0 = const()[name = tensor<string, []>("input_147_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_147_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_147_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578356800)))];
+            tensor<fp16, [1280]> input_147_beta_0_to_fp16 = const()[name = tensor<string, []>("input_147_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578359424)))];
+            tensor<fp16, []> var_4124_to_fp16 = const()[name = tensor<string, []>("op_4124_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_147_cast_fp16 = layer_norm(axes = input_147_axes_0, beta = input_147_beta_0_to_fp16, epsilon = var_4124_to_fp16, gamma = input_147_gamma_0_to_fp16, x = inputs_59_cast_fp16)[name = tensor<string, []>("input_147_cast_fp16")];
+            tensor<string, []> input_149_pad_type_0 = const()[name = tensor<string, []>("input_149_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_149_strides_0 = const()[name = tensor<string, []>("input_149_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_149_pad_0 = const()[name = tensor<string, []>("input_149_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_149_dilations_0 = const()[name = tensor<string, []>("input_149_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_149_groups_0 = const()[name = tensor<string, []>("input_149_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_14_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578362048)))];
+            tensor<fp16, [5120]> blocks_14_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(591469312)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_149_cast_fp16 = conv(bias = blocks_14_mlp_0_bias_to_fp16, dilations = input_149_dilations_0, groups = input_149_groups_0, pad = input_149_pad_0, pad_type = input_149_pad_type_0, strides = input_149_strides_0, weight = blocks_14_mlp_0_weight_to_fp16, x = input_147_cast_fp16)[name = tensor<string, []>("input_149_cast_fp16")];
+            tensor<string, []> input_151_mode_0 = const()[name = tensor<string, []>("input_151_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_151_cast_fp16 = gelu(mode = input_151_mode_0, x = input_149_cast_fp16)[name = tensor<string, []>("input_151_cast_fp16")];
+            tensor<string, []> var_4150_pad_type_0 = const()[name = tensor<string, []>("op_4150_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4150_strides_0 = const()[name = tensor<string, []>("op_4150_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4150_pad_0 = const()[name = tensor<string, []>("op_4150_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4150_dilations_0 = const()[name = tensor<string, []>("op_4150_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4150_groups_0 = const()[name = tensor<string, []>("op_4150_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_14_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(591479616)))];
+            tensor<fp16, [1280]> blocks_14_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(604586880)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4150_cast_fp16 = conv(bias = blocks_14_mlp_2_bias_to_fp16, dilations = var_4150_dilations_0, groups = var_4150_groups_0, pad = var_4150_pad_0, pad_type = var_4150_pad_type_0, strides = var_4150_strides_0, weight = blocks_14_mlp_2_weight_to_fp16, x = input_151_cast_fp16)[name = tensor<string, []>("op_4150_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = var_4150_cast_fp16)[name = tensor<string, []>("inputs_61_cast_fp16")];
+            tensor<int32, []> var_4159 = const()[name = tensor<string, []>("op_4159"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_153_axes_0 = const()[name = tensor<string, []>("input_153_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_153_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_153_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(604589504)))];
+            tensor<fp16, [1280]> input_153_beta_0_to_fp16 = const()[name = tensor<string, []>("input_153_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(604592128)))];
+            tensor<fp16, []> var_4175_to_fp16 = const()[name = tensor<string, []>("op_4175_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_153_cast_fp16 = layer_norm(axes = input_153_axes_0, beta = input_153_beta_0_to_fp16, epsilon = var_4175_to_fp16, gamma = input_153_gamma_0_to_fp16, x = inputs_61_cast_fp16)[name = tensor<string, []>("input_153_cast_fp16")];
+            tensor<string, []> q_31_pad_type_0 = const()[name = tensor<string, []>("q_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_31_strides_0 = const()[name = tensor<string, []>("q_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_31_pad_0 = const()[name = tensor<string, []>("q_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_31_dilations_0 = const()[name = tensor<string, []>("q_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_31_groups_0 = const()[name = tensor<string, []>("q_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_4210_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4210_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(604594752)))];
+            tensor<fp16, [1280]> var_4210_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4210_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(607871616)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4210_cast_fp16 = conv(bias = var_4210_bias_0_to_fp16, dilations = q_31_dilations_0, groups = q_31_groups_0, pad = q_31_pad_0, pad_type = q_31_pad_type_0, strides = q_31_strides_0, weight = var_4210_weight_0_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("op_4210_cast_fp16")];
+            tensor<string, []> k_31_pad_type_0 = const()[name = tensor<string, []>("k_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_31_strides_0 = const()[name = tensor<string, []>("k_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_31_pad_0 = const()[name = tensor<string, []>("k_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_31_dilations_0 = const()[name = tensor<string, []>("k_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_31_groups_0 = const()[name = tensor<string, []>("k_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_15_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(607874240)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_31_cast_fp16 = conv(dilations = k_31_dilations_0, groups = k_31_groups_0, pad = k_31_pad_0, pad_type = k_31_pad_type_0, strides = k_31_strides_0, weight = blocks_15_attn_key_weight_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("k_31_cast_fp16")];
+            tensor<string, []> var_4208_pad_type_0 = const()[name = tensor<string, []>("op_4208_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4208_strides_0 = const()[name = tensor<string, []>("op_4208_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4208_pad_0 = const()[name = tensor<string, []>("op_4208_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4208_dilations_0 = const()[name = tensor<string, []>("op_4208_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4208_groups_0 = const()[name = tensor<string, []>("op_4208_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_15_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(611151104)))];
+            tensor<fp16, [1280]> blocks_15_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614427968)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4208_cast_fp16 = conv(bias = blocks_15_attn_value_bias_to_fp16, dilations = var_4208_dilations_0, groups = var_4208_groups_0, pad = var_4208_pad_0, pad_type = var_4208_pad_type_0, strides = var_4208_strides_0, weight = blocks_15_attn_value_weight_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("op_4208_cast_fp16")];
+            tensor<int32, [20]> tile_45 = const()[name = tensor<string, []>("tile_45"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4211_axis_0 = const()[name = tensor<string, []>("op_4211_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_19 = split(axis = var_4211_axis_0, split_sizes = tile_45, x = var_4210_cast_fp16)[name = tensor<string, []>("op_4211_cast_fp16")];
+            tensor<int32, [4]> var_4232_perm_0 = const()[name = tensor<string, []>("op_4232_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_46 = const()[name = tensor<string, []>("tile_46"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4233_axis_0 = const()[name = tensor<string, []>("op_4233_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_4232_cast_fp16 = transpose(perm = var_4232_perm_0, x = k_31_cast_fp16)[name = tensor<string, []>("transpose_17")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_19 = split(axis = var_4233_axis_0, split_sizes = tile_46, x = var_4232_cast_fp16)[name = tensor<string, []>("op_4233_cast_fp16")];
+            tensor<int32, [20]> tile_47 = const()[name = tensor<string, []>("tile_47"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4254_axis_0 = const()[name = tensor<string, []>("op_4254_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_19 = split(axis = var_4254_axis_0, split_sizes = tile_47, x = var_4208_cast_fp16)[name = tensor<string, []>("op_4254_cast_fp16")];
+            tensor<string, []> aw_601_equation_0 = const()[name = tensor<string, []>("aw_601_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_601_cast_fp16 = einsum(equation = aw_601_equation_0, values = (var_4233_cast_fp16_0, var_4211_cast_fp16_0))[name = tensor<string, []>("aw_601_cast_fp16")];
+            tensor<string, []> aw_603_equation_0 = const()[name = tensor<string, []>("aw_603_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_603_cast_fp16 = einsum(equation = aw_603_equation_0, values = (var_4233_cast_fp16_1, var_4211_cast_fp16_1))[name = tensor<string, []>("aw_603_cast_fp16")];
+            tensor<string, []> aw_605_equation_0 = const()[name = tensor<string, []>("aw_605_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_605_cast_fp16 = einsum(equation = aw_605_equation_0, values = (var_4233_cast_fp16_2, var_4211_cast_fp16_2))[name = tensor<string, []>("aw_605_cast_fp16")];
+            tensor<string, []> aw_607_equation_0 = const()[name = tensor<string, []>("aw_607_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_607_cast_fp16 = einsum(equation = aw_607_equation_0, values = (var_4233_cast_fp16_3, var_4211_cast_fp16_3))[name = tensor<string, []>("aw_607_cast_fp16")];
+            tensor<string, []> aw_609_equation_0 = const()[name = tensor<string, []>("aw_609_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_609_cast_fp16 = einsum(equation = aw_609_equation_0, values = (var_4233_cast_fp16_4, var_4211_cast_fp16_4))[name = tensor<string, []>("aw_609_cast_fp16")];
+            tensor<string, []> aw_611_equation_0 = const()[name = tensor<string, []>("aw_611_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_611_cast_fp16 = einsum(equation = aw_611_equation_0, values = (var_4233_cast_fp16_5, var_4211_cast_fp16_5))[name = tensor<string, []>("aw_611_cast_fp16")];
+            tensor<string, []> aw_613_equation_0 = const()[name = tensor<string, []>("aw_613_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_613_cast_fp16 = einsum(equation = aw_613_equation_0, values = (var_4233_cast_fp16_6, var_4211_cast_fp16_6))[name = tensor<string, []>("aw_613_cast_fp16")];
+            tensor<string, []> aw_615_equation_0 = const()[name = tensor<string, []>("aw_615_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_615_cast_fp16 = einsum(equation = aw_615_equation_0, values = (var_4233_cast_fp16_7, var_4211_cast_fp16_7))[name = tensor<string, []>("aw_615_cast_fp16")];
+            tensor<string, []> aw_617_equation_0 = const()[name = tensor<string, []>("aw_617_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_617_cast_fp16 = einsum(equation = aw_617_equation_0, values = (var_4233_cast_fp16_8, var_4211_cast_fp16_8))[name = tensor<string, []>("aw_617_cast_fp16")];
+            tensor<string, []> aw_619_equation_0 = const()[name = tensor<string, []>("aw_619_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_619_cast_fp16 = einsum(equation = aw_619_equation_0, values = (var_4233_cast_fp16_9, var_4211_cast_fp16_9))[name = tensor<string, []>("aw_619_cast_fp16")];
+            tensor<string, []> aw_621_equation_0 = const()[name = tensor<string, []>("aw_621_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_621_cast_fp16 = einsum(equation = aw_621_equation_0, values = (var_4233_cast_fp16_10, var_4211_cast_fp16_10))[name = tensor<string, []>("aw_621_cast_fp16")];
+            tensor<string, []> aw_623_equation_0 = const()[name = tensor<string, []>("aw_623_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_623_cast_fp16 = einsum(equation = aw_623_equation_0, values = (var_4233_cast_fp16_11, var_4211_cast_fp16_11))[name = tensor<string, []>("aw_623_cast_fp16")];
+            tensor<string, []> aw_625_equation_0 = const()[name = tensor<string, []>("aw_625_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_625_cast_fp16 = einsum(equation = aw_625_equation_0, values = (var_4233_cast_fp16_12, var_4211_cast_fp16_12))[name = tensor<string, []>("aw_625_cast_fp16")];
+            tensor<string, []> aw_627_equation_0 = const()[name = tensor<string, []>("aw_627_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_627_cast_fp16 = einsum(equation = aw_627_equation_0, values = (var_4233_cast_fp16_13, var_4211_cast_fp16_13))[name = tensor<string, []>("aw_627_cast_fp16")];
+            tensor<string, []> aw_629_equation_0 = const()[name = tensor<string, []>("aw_629_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_629_cast_fp16 = einsum(equation = aw_629_equation_0, values = (var_4233_cast_fp16_14, var_4211_cast_fp16_14))[name = tensor<string, []>("aw_629_cast_fp16")];
+            tensor<string, []> aw_631_equation_0 = const()[name = tensor<string, []>("aw_631_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_631_cast_fp16 = einsum(equation = aw_631_equation_0, values = (var_4233_cast_fp16_15, var_4211_cast_fp16_15))[name = tensor<string, []>("aw_631_cast_fp16")];
+            tensor<string, []> aw_633_equation_0 = const()[name = tensor<string, []>("aw_633_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_633_cast_fp16 = einsum(equation = aw_633_equation_0, values = (var_4233_cast_fp16_16, var_4211_cast_fp16_16))[name = tensor<string, []>("aw_633_cast_fp16")];
+            tensor<string, []> aw_635_equation_0 = const()[name = tensor<string, []>("aw_635_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_635_cast_fp16 = einsum(equation = aw_635_equation_0, values = (var_4233_cast_fp16_17, var_4211_cast_fp16_17))[name = tensor<string, []>("aw_635_cast_fp16")];
+            tensor<string, []> aw_637_equation_0 = const()[name = tensor<string, []>("aw_637_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_637_cast_fp16 = einsum(equation = aw_637_equation_0, values = (var_4233_cast_fp16_18, var_4211_cast_fp16_18))[name = tensor<string, []>("aw_637_cast_fp16")];
+            tensor<string, []> aw_639_equation_0 = const()[name = tensor<string, []>("aw_639_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_639_cast_fp16 = einsum(equation = aw_639_equation_0, values = (var_4233_cast_fp16_19, var_4211_cast_fp16_19))[name = tensor<string, []>("aw_639_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4315_cast_fp16 = softmax(axis = var_4159, x = aw_601_cast_fp16)[name = tensor<string, []>("op_4315_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4316_cast_fp16 = softmax(axis = var_4159, x = aw_603_cast_fp16)[name = tensor<string, []>("op_4316_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4317_cast_fp16 = softmax(axis = var_4159, x = aw_605_cast_fp16)[name = tensor<string, []>("op_4317_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4318_cast_fp16 = softmax(axis = var_4159, x = aw_607_cast_fp16)[name = tensor<string, []>("op_4318_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4319_cast_fp16 = softmax(axis = var_4159, x = aw_609_cast_fp16)[name = tensor<string, []>("op_4319_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4320_cast_fp16 = softmax(axis = var_4159, x = aw_611_cast_fp16)[name = tensor<string, []>("op_4320_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4321_cast_fp16 = softmax(axis = var_4159, x = aw_613_cast_fp16)[name = tensor<string, []>("op_4321_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4322_cast_fp16 = softmax(axis = var_4159, x = aw_615_cast_fp16)[name = tensor<string, []>("op_4322_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4323_cast_fp16 = softmax(axis = var_4159, x = aw_617_cast_fp16)[name = tensor<string, []>("op_4323_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4324_cast_fp16 = softmax(axis = var_4159, x = aw_619_cast_fp16)[name = tensor<string, []>("op_4324_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4325_cast_fp16 = softmax(axis = var_4159, x = aw_621_cast_fp16)[name = tensor<string, []>("op_4325_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4326_cast_fp16 = softmax(axis = var_4159, x = aw_623_cast_fp16)[name = tensor<string, []>("op_4326_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4327_cast_fp16 = softmax(axis = var_4159, x = aw_625_cast_fp16)[name = tensor<string, []>("op_4327_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4328_cast_fp16 = softmax(axis = var_4159, x = aw_627_cast_fp16)[name = tensor<string, []>("op_4328_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4329_cast_fp16 = softmax(axis = var_4159, x = aw_629_cast_fp16)[name = tensor<string, []>("op_4329_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4330_cast_fp16 = softmax(axis = var_4159, x = aw_631_cast_fp16)[name = tensor<string, []>("op_4330_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4331_cast_fp16 = softmax(axis = var_4159, x = aw_633_cast_fp16)[name = tensor<string, []>("op_4331_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4332_cast_fp16 = softmax(axis = var_4159, x = aw_635_cast_fp16)[name = tensor<string, []>("op_4332_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4333_cast_fp16 = softmax(axis = var_4159, x = aw_637_cast_fp16)[name = tensor<string, []>("op_4333_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4334_cast_fp16 = softmax(axis = var_4159, x = aw_639_cast_fp16)[name = tensor<string, []>("op_4334_cast_fp16")];
+            tensor<string, []> var_4336_equation_0 = const()[name = tensor<string, []>("op_4336_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4336_cast_fp16 = einsum(equation = var_4336_equation_0, values = (var_4254_cast_fp16_0, var_4315_cast_fp16))[name = tensor<string, []>("op_4336_cast_fp16")];
+            tensor<string, []> var_4338_equation_0 = const()[name = tensor<string, []>("op_4338_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4338_cast_fp16 = einsum(equation = var_4338_equation_0, values = (var_4254_cast_fp16_1, var_4316_cast_fp16))[name = tensor<string, []>("op_4338_cast_fp16")];
+            tensor<string, []> var_4340_equation_0 = const()[name = tensor<string, []>("op_4340_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4340_cast_fp16 = einsum(equation = var_4340_equation_0, values = (var_4254_cast_fp16_2, var_4317_cast_fp16))[name = tensor<string, []>("op_4340_cast_fp16")];
+            tensor<string, []> var_4342_equation_0 = const()[name = tensor<string, []>("op_4342_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4342_cast_fp16 = einsum(equation = var_4342_equation_0, values = (var_4254_cast_fp16_3, var_4318_cast_fp16))[name = tensor<string, []>("op_4342_cast_fp16")];
+            tensor<string, []> var_4344_equation_0 = const()[name = tensor<string, []>("op_4344_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4344_cast_fp16 = einsum(equation = var_4344_equation_0, values = (var_4254_cast_fp16_4, var_4319_cast_fp16))[name = tensor<string, []>("op_4344_cast_fp16")];
+            tensor<string, []> var_4346_equation_0 = const()[name = tensor<string, []>("op_4346_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4346_cast_fp16 = einsum(equation = var_4346_equation_0, values = (var_4254_cast_fp16_5, var_4320_cast_fp16))[name = tensor<string, []>("op_4346_cast_fp16")];
+            tensor<string, []> var_4348_equation_0 = const()[name = tensor<string, []>("op_4348_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4348_cast_fp16 = einsum(equation = var_4348_equation_0, values = (var_4254_cast_fp16_6, var_4321_cast_fp16))[name = tensor<string, []>("op_4348_cast_fp16")];
+            tensor<string, []> var_4350_equation_0 = const()[name = tensor<string, []>("op_4350_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4350_cast_fp16 = einsum(equation = var_4350_equation_0, values = (var_4254_cast_fp16_7, var_4322_cast_fp16))[name = tensor<string, []>("op_4350_cast_fp16")];
+            tensor<string, []> var_4352_equation_0 = const()[name = tensor<string, []>("op_4352_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4352_cast_fp16 = einsum(equation = var_4352_equation_0, values = (var_4254_cast_fp16_8, var_4323_cast_fp16))[name = tensor<string, []>("op_4352_cast_fp16")];
+            tensor<string, []> var_4354_equation_0 = const()[name = tensor<string, []>("op_4354_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4354_cast_fp16 = einsum(equation = var_4354_equation_0, values = (var_4254_cast_fp16_9, var_4324_cast_fp16))[name = tensor<string, []>("op_4354_cast_fp16")];
+            tensor<string, []> var_4356_equation_0 = const()[name = tensor<string, []>("op_4356_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4356_cast_fp16 = einsum(equation = var_4356_equation_0, values = (var_4254_cast_fp16_10, var_4325_cast_fp16))[name = tensor<string, []>("op_4356_cast_fp16")];
+            tensor<string, []> var_4358_equation_0 = const()[name = tensor<string, []>("op_4358_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4358_cast_fp16 = einsum(equation = var_4358_equation_0, values = (var_4254_cast_fp16_11, var_4326_cast_fp16))[name = tensor<string, []>("op_4358_cast_fp16")];
+            tensor<string, []> var_4360_equation_0 = const()[name = tensor<string, []>("op_4360_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4360_cast_fp16 = einsum(equation = var_4360_equation_0, values = (var_4254_cast_fp16_12, var_4327_cast_fp16))[name = tensor<string, []>("op_4360_cast_fp16")];
+            tensor<string, []> var_4362_equation_0 = const()[name = tensor<string, []>("op_4362_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4362_cast_fp16 = einsum(equation = var_4362_equation_0, values = (var_4254_cast_fp16_13, var_4328_cast_fp16))[name = tensor<string, []>("op_4362_cast_fp16")];
+            tensor<string, []> var_4364_equation_0 = const()[name = tensor<string, []>("op_4364_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4364_cast_fp16 = einsum(equation = var_4364_equation_0, values = (var_4254_cast_fp16_14, var_4329_cast_fp16))[name = tensor<string, []>("op_4364_cast_fp16")];
+            tensor<string, []> var_4366_equation_0 = const()[name = tensor<string, []>("op_4366_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4366_cast_fp16 = einsum(equation = var_4366_equation_0, values = (var_4254_cast_fp16_15, var_4330_cast_fp16))[name = tensor<string, []>("op_4366_cast_fp16")];
+            tensor<string, []> var_4368_equation_0 = const()[name = tensor<string, []>("op_4368_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4368_cast_fp16 = einsum(equation = var_4368_equation_0, values = (var_4254_cast_fp16_16, var_4331_cast_fp16))[name = tensor<string, []>("op_4368_cast_fp16")];
+            tensor<string, []> var_4370_equation_0 = const()[name = tensor<string, []>("op_4370_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4370_cast_fp16 = einsum(equation = var_4370_equation_0, values = (var_4254_cast_fp16_17, var_4332_cast_fp16))[name = tensor<string, []>("op_4370_cast_fp16")];
+            tensor<string, []> var_4372_equation_0 = const()[name = tensor<string, []>("op_4372_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4372_cast_fp16 = einsum(equation = var_4372_equation_0, values = (var_4254_cast_fp16_18, var_4333_cast_fp16))[name = tensor<string, []>("op_4372_cast_fp16")];
+            tensor<string, []> var_4374_equation_0 = const()[name = tensor<string, []>("op_4374_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4374_cast_fp16 = einsum(equation = var_4374_equation_0, values = (var_4254_cast_fp16_19, var_4334_cast_fp16))[name = tensor<string, []>("op_4374_cast_fp16")];
+            tensor<bool, []> input_155_interleave_0 = const()[name = tensor<string, []>("input_155_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_155_cast_fp16 = concat(axis = var_4159, interleave = input_155_interleave_0, values = (var_4336_cast_fp16, var_4338_cast_fp16, var_4340_cast_fp16, var_4342_cast_fp16, var_4344_cast_fp16, var_4346_cast_fp16, var_4348_cast_fp16, var_4350_cast_fp16, var_4352_cast_fp16, var_4354_cast_fp16, var_4356_cast_fp16, var_4358_cast_fp16, var_4360_cast_fp16, var_4362_cast_fp16, var_4364_cast_fp16, var_4366_cast_fp16, var_4368_cast_fp16, var_4370_cast_fp16, var_4372_cast_fp16, var_4374_cast_fp16))[name = tensor<string, []>("input_155_cast_fp16")];
+            tensor<string, []> var_4383_pad_type_0 = const()[name = tensor<string, []>("op_4383_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4383_strides_0 = const()[name = tensor<string, []>("op_4383_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4383_pad_0 = const()[name = tensor<string, []>("op_4383_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4383_dilations_0 = const()[name = tensor<string, []>("op_4383_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4383_groups_0 = const()[name = tensor<string, []>("op_4383_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_15_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614430592)))];
+            tensor<fp16, [1280]> blocks_15_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(617707456)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4383_cast_fp16 = conv(bias = blocks_15_attn_out_bias_to_fp16, dilations = var_4383_dilations_0, groups = var_4383_groups_0, pad = var_4383_pad_0, pad_type = var_4383_pad_type_0, strides = var_4383_strides_0, weight = blocks_15_attn_out_weight_to_fp16, x = input_155_cast_fp16)[name = tensor<string, []>("op_4383_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = var_4383_cast_fp16)[name = tensor<string, []>("inputs_63_cast_fp16")];
+            tensor<int32, [1]> input_157_axes_0 = const()[name = tensor<string, []>("input_157_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_157_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_157_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(617710080)))];
+            tensor<fp16, [1280]> input_157_beta_0_to_fp16 = const()[name = tensor<string, []>("input_157_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(617712704)))];
+            tensor<fp16, []> var_4393_to_fp16 = const()[name = tensor<string, []>("op_4393_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_157_cast_fp16 = layer_norm(axes = input_157_axes_0, beta = input_157_beta_0_to_fp16, epsilon = var_4393_to_fp16, gamma = input_157_gamma_0_to_fp16, x = inputs_63_cast_fp16)[name = tensor<string, []>("input_157_cast_fp16")];
+            tensor<string, []> input_159_pad_type_0 = const()[name = tensor<string, []>("input_159_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_159_strides_0 = const()[name = tensor<string, []>("input_159_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_159_pad_0 = const()[name = tensor<string, []>("input_159_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_159_dilations_0 = const()[name = tensor<string, []>("input_159_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_159_groups_0 = const()[name = tensor<string, []>("input_159_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_15_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(617715328)))];
+            tensor<fp16, [5120]> blocks_15_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(630822592)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_159_cast_fp16 = conv(bias = blocks_15_mlp_0_bias_to_fp16, dilations = input_159_dilations_0, groups = input_159_groups_0, pad = input_159_pad_0, pad_type = input_159_pad_type_0, strides = input_159_strides_0, weight = blocks_15_mlp_0_weight_to_fp16, x = input_157_cast_fp16)[name = tensor<string, []>("input_159_cast_fp16")];
+            tensor<string, []> input_161_mode_0 = const()[name = tensor<string, []>("input_161_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_161_cast_fp16 = gelu(mode = input_161_mode_0, x = input_159_cast_fp16)[name = tensor<string, []>("input_161_cast_fp16")];
+            tensor<string, []> var_4419_pad_type_0 = const()[name = tensor<string, []>("op_4419_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4419_strides_0 = const()[name = tensor<string, []>("op_4419_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4419_pad_0 = const()[name = tensor<string, []>("op_4419_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4419_dilations_0 = const()[name = tensor<string, []>("op_4419_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4419_groups_0 = const()[name = tensor<string, []>("op_4419_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_15_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(630832896)))];
+            tensor<fp16, [1280]> blocks_15_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(643940160)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4419_cast_fp16 = conv(bias = blocks_15_mlp_2_bias_to_fp16, dilations = var_4419_dilations_0, groups = var_4419_groups_0, pad = var_4419_pad_0, pad_type = var_4419_pad_type_0, strides = var_4419_strides_0, weight = blocks_15_mlp_2_weight_to_fp16, x = input_161_cast_fp16)[name = tensor<string, []>("op_4419_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = var_4419_cast_fp16)[name = tensor<string, []>("inputs_65_cast_fp16")];
+            tensor<int32, []> var_4428 = const()[name = tensor<string, []>("op_4428"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_163_axes_0 = const()[name = tensor<string, []>("input_163_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_163_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_163_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(643942784)))];
+            tensor<fp16, [1280]> input_163_beta_0_to_fp16 = const()[name = tensor<string, []>("input_163_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(643945408)))];
+            tensor<fp16, []> var_4444_to_fp16 = const()[name = tensor<string, []>("op_4444_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_163_cast_fp16 = layer_norm(axes = input_163_axes_0, beta = input_163_beta_0_to_fp16, epsilon = var_4444_to_fp16, gamma = input_163_gamma_0_to_fp16, x = inputs_65_cast_fp16)[name = tensor<string, []>("input_163_cast_fp16")];
+            tensor<string, []> q_33_pad_type_0 = const()[name = tensor<string, []>("q_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_33_strides_0 = const()[name = tensor<string, []>("q_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_33_pad_0 = const()[name = tensor<string, []>("q_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_33_dilations_0 = const()[name = tensor<string, []>("q_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_33_groups_0 = const()[name = tensor<string, []>("q_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_4479_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4479_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(643948032)))];
+            tensor<fp16, [1280]> var_4479_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4479_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(647224896)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4479_cast_fp16 = conv(bias = var_4479_bias_0_to_fp16, dilations = q_33_dilations_0, groups = q_33_groups_0, pad = q_33_pad_0, pad_type = q_33_pad_type_0, strides = q_33_strides_0, weight = var_4479_weight_0_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("op_4479_cast_fp16")];
+            tensor<string, []> k_33_pad_type_0 = const()[name = tensor<string, []>("k_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_33_strides_0 = const()[name = tensor<string, []>("k_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_33_pad_0 = const()[name = tensor<string, []>("k_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_33_dilations_0 = const()[name = tensor<string, []>("k_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_33_groups_0 = const()[name = tensor<string, []>("k_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_16_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(647227520)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_33_cast_fp16 = conv(dilations = k_33_dilations_0, groups = k_33_groups_0, pad = k_33_pad_0, pad_type = k_33_pad_type_0, strides = k_33_strides_0, weight = blocks_16_attn_key_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("k_33_cast_fp16")];
+            tensor<string, []> var_4477_pad_type_0 = const()[name = tensor<string, []>("op_4477_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4477_strides_0 = const()[name = tensor<string, []>("op_4477_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4477_pad_0 = const()[name = tensor<string, []>("op_4477_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4477_dilations_0 = const()[name = tensor<string, []>("op_4477_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4477_groups_0 = const()[name = tensor<string, []>("op_4477_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_16_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(650504384)))];
+            tensor<fp16, [1280]> blocks_16_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(653781248)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4477_cast_fp16 = conv(bias = blocks_16_attn_value_bias_to_fp16, dilations = var_4477_dilations_0, groups = var_4477_groups_0, pad = var_4477_pad_0, pad_type = var_4477_pad_type_0, strides = var_4477_strides_0, weight = blocks_16_attn_value_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("op_4477_cast_fp16")];
+            tensor<int32, [20]> tile_48 = const()[name = tensor<string, []>("tile_48"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4480_axis_0 = const()[name = tensor<string, []>("op_4480_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_19 = split(axis = var_4480_axis_0, split_sizes = tile_48, x = var_4479_cast_fp16)[name = tensor<string, []>("op_4480_cast_fp16")];
+            tensor<int32, [4]> var_4501_perm_0 = const()[name = tensor<string, []>("op_4501_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_49 = const()[name = tensor<string, []>("tile_49"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4502_axis_0 = const()[name = tensor<string, []>("op_4502_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_4501_cast_fp16 = transpose(perm = var_4501_perm_0, x = k_33_cast_fp16)[name = tensor<string, []>("transpose_16")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_19 = split(axis = var_4502_axis_0, split_sizes = tile_49, x = var_4501_cast_fp16)[name = tensor<string, []>("op_4502_cast_fp16")];
+            tensor<int32, [20]> tile_50 = const()[name = tensor<string, []>("tile_50"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4523_axis_0 = const()[name = tensor<string, []>("op_4523_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_19 = split(axis = var_4523_axis_0, split_sizes = tile_50, x = var_4477_cast_fp16)[name = tensor<string, []>("op_4523_cast_fp16")];
+            tensor<string, []> aw_641_equation_0 = const()[name = tensor<string, []>("aw_641_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_641_cast_fp16 = einsum(equation = aw_641_equation_0, values = (var_4502_cast_fp16_0, var_4480_cast_fp16_0))[name = tensor<string, []>("aw_641_cast_fp16")];
+            tensor<string, []> aw_643_equation_0 = const()[name = tensor<string, []>("aw_643_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_643_cast_fp16 = einsum(equation = aw_643_equation_0, values = (var_4502_cast_fp16_1, var_4480_cast_fp16_1))[name = tensor<string, []>("aw_643_cast_fp16")];
+            tensor<string, []> aw_645_equation_0 = const()[name = tensor<string, []>("aw_645_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_645_cast_fp16 = einsum(equation = aw_645_equation_0, values = (var_4502_cast_fp16_2, var_4480_cast_fp16_2))[name = tensor<string, []>("aw_645_cast_fp16")];
+            tensor<string, []> aw_647_equation_0 = const()[name = tensor<string, []>("aw_647_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_647_cast_fp16 = einsum(equation = aw_647_equation_0, values = (var_4502_cast_fp16_3, var_4480_cast_fp16_3))[name = tensor<string, []>("aw_647_cast_fp16")];
+            tensor<string, []> aw_649_equation_0 = const()[name = tensor<string, []>("aw_649_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_649_cast_fp16 = einsum(equation = aw_649_equation_0, values = (var_4502_cast_fp16_4, var_4480_cast_fp16_4))[name = tensor<string, []>("aw_649_cast_fp16")];
+            tensor<string, []> aw_651_equation_0 = const()[name = tensor<string, []>("aw_651_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_651_cast_fp16 = einsum(equation = aw_651_equation_0, values = (var_4502_cast_fp16_5, var_4480_cast_fp16_5))[name = tensor<string, []>("aw_651_cast_fp16")];
+            tensor<string, []> aw_653_equation_0 = const()[name = tensor<string, []>("aw_653_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_653_cast_fp16 = einsum(equation = aw_653_equation_0, values = (var_4502_cast_fp16_6, var_4480_cast_fp16_6))[name = tensor<string, []>("aw_653_cast_fp16")];
+            tensor<string, []> aw_655_equation_0 = const()[name = tensor<string, []>("aw_655_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_655_cast_fp16 = einsum(equation = aw_655_equation_0, values = (var_4502_cast_fp16_7, var_4480_cast_fp16_7))[name = tensor<string, []>("aw_655_cast_fp16")];
+            tensor<string, []> aw_657_equation_0 = const()[name = tensor<string, []>("aw_657_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_657_cast_fp16 = einsum(equation = aw_657_equation_0, values = (var_4502_cast_fp16_8, var_4480_cast_fp16_8))[name = tensor<string, []>("aw_657_cast_fp16")];
+            tensor<string, []> aw_659_equation_0 = const()[name = tensor<string, []>("aw_659_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_659_cast_fp16 = einsum(equation = aw_659_equation_0, values = (var_4502_cast_fp16_9, var_4480_cast_fp16_9))[name = tensor<string, []>("aw_659_cast_fp16")];
+            tensor<string, []> aw_661_equation_0 = const()[name = tensor<string, []>("aw_661_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_661_cast_fp16 = einsum(equation = aw_661_equation_0, values = (var_4502_cast_fp16_10, var_4480_cast_fp16_10))[name = tensor<string, []>("aw_661_cast_fp16")];
+            tensor<string, []> aw_663_equation_0 = const()[name = tensor<string, []>("aw_663_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_663_cast_fp16 = einsum(equation = aw_663_equation_0, values = (var_4502_cast_fp16_11, var_4480_cast_fp16_11))[name = tensor<string, []>("aw_663_cast_fp16")];
+            tensor<string, []> aw_665_equation_0 = const()[name = tensor<string, []>("aw_665_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_665_cast_fp16 = einsum(equation = aw_665_equation_0, values = (var_4502_cast_fp16_12, var_4480_cast_fp16_12))[name = tensor<string, []>("aw_665_cast_fp16")];
+            tensor<string, []> aw_667_equation_0 = const()[name = tensor<string, []>("aw_667_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_667_cast_fp16 = einsum(equation = aw_667_equation_0, values = (var_4502_cast_fp16_13, var_4480_cast_fp16_13))[name = tensor<string, []>("aw_667_cast_fp16")];
+            tensor<string, []> aw_669_equation_0 = const()[name = tensor<string, []>("aw_669_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_669_cast_fp16 = einsum(equation = aw_669_equation_0, values = (var_4502_cast_fp16_14, var_4480_cast_fp16_14))[name = tensor<string, []>("aw_669_cast_fp16")];
+            tensor<string, []> aw_671_equation_0 = const()[name = tensor<string, []>("aw_671_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_671_cast_fp16 = einsum(equation = aw_671_equation_0, values = (var_4502_cast_fp16_15, var_4480_cast_fp16_15))[name = tensor<string, []>("aw_671_cast_fp16")];
+            tensor<string, []> aw_673_equation_0 = const()[name = tensor<string, []>("aw_673_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_673_cast_fp16 = einsum(equation = aw_673_equation_0, values = (var_4502_cast_fp16_16, var_4480_cast_fp16_16))[name = tensor<string, []>("aw_673_cast_fp16")];
+            tensor<string, []> aw_675_equation_0 = const()[name = tensor<string, []>("aw_675_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_675_cast_fp16 = einsum(equation = aw_675_equation_0, values = (var_4502_cast_fp16_17, var_4480_cast_fp16_17))[name = tensor<string, []>("aw_675_cast_fp16")];
+            tensor<string, []> aw_677_equation_0 = const()[name = tensor<string, []>("aw_677_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_677_cast_fp16 = einsum(equation = aw_677_equation_0, values = (var_4502_cast_fp16_18, var_4480_cast_fp16_18))[name = tensor<string, []>("aw_677_cast_fp16")];
+            tensor<string, []> aw_679_equation_0 = const()[name = tensor<string, []>("aw_679_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_679_cast_fp16 = einsum(equation = aw_679_equation_0, values = (var_4502_cast_fp16_19, var_4480_cast_fp16_19))[name = tensor<string, []>("aw_679_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4584_cast_fp16 = softmax(axis = var_4428, x = aw_641_cast_fp16)[name = tensor<string, []>("op_4584_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4585_cast_fp16 = softmax(axis = var_4428, x = aw_643_cast_fp16)[name = tensor<string, []>("op_4585_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4586_cast_fp16 = softmax(axis = var_4428, x = aw_645_cast_fp16)[name = tensor<string, []>("op_4586_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4587_cast_fp16 = softmax(axis = var_4428, x = aw_647_cast_fp16)[name = tensor<string, []>("op_4587_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4588_cast_fp16 = softmax(axis = var_4428, x = aw_649_cast_fp16)[name = tensor<string, []>("op_4588_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4589_cast_fp16 = softmax(axis = var_4428, x = aw_651_cast_fp16)[name = tensor<string, []>("op_4589_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4590_cast_fp16 = softmax(axis = var_4428, x = aw_653_cast_fp16)[name = tensor<string, []>("op_4590_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4591_cast_fp16 = softmax(axis = var_4428, x = aw_655_cast_fp16)[name = tensor<string, []>("op_4591_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4592_cast_fp16 = softmax(axis = var_4428, x = aw_657_cast_fp16)[name = tensor<string, []>("op_4592_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4593_cast_fp16 = softmax(axis = var_4428, x = aw_659_cast_fp16)[name = tensor<string, []>("op_4593_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4594_cast_fp16 = softmax(axis = var_4428, x = aw_661_cast_fp16)[name = tensor<string, []>("op_4594_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4595_cast_fp16 = softmax(axis = var_4428, x = aw_663_cast_fp16)[name = tensor<string, []>("op_4595_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4596_cast_fp16 = softmax(axis = var_4428, x = aw_665_cast_fp16)[name = tensor<string, []>("op_4596_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4597_cast_fp16 = softmax(axis = var_4428, x = aw_667_cast_fp16)[name = tensor<string, []>("op_4597_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4598_cast_fp16 = softmax(axis = var_4428, x = aw_669_cast_fp16)[name = tensor<string, []>("op_4598_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4599_cast_fp16 = softmax(axis = var_4428, x = aw_671_cast_fp16)[name = tensor<string, []>("op_4599_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4600_cast_fp16 = softmax(axis = var_4428, x = aw_673_cast_fp16)[name = tensor<string, []>("op_4600_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4601_cast_fp16 = softmax(axis = var_4428, x = aw_675_cast_fp16)[name = tensor<string, []>("op_4601_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4602_cast_fp16 = softmax(axis = var_4428, x = aw_677_cast_fp16)[name = tensor<string, []>("op_4602_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4603_cast_fp16 = softmax(axis = var_4428, x = aw_679_cast_fp16)[name = tensor<string, []>("op_4603_cast_fp16")];
+            tensor<string, []> var_4605_equation_0 = const()[name = tensor<string, []>("op_4605_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4605_cast_fp16 = einsum(equation = var_4605_equation_0, values = (var_4523_cast_fp16_0, var_4584_cast_fp16))[name = tensor<string, []>("op_4605_cast_fp16")];
+            tensor<string, []> var_4607_equation_0 = const()[name = tensor<string, []>("op_4607_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4607_cast_fp16 = einsum(equation = var_4607_equation_0, values = (var_4523_cast_fp16_1, var_4585_cast_fp16))[name = tensor<string, []>("op_4607_cast_fp16")];
+            tensor<string, []> var_4609_equation_0 = const()[name = tensor<string, []>("op_4609_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4609_cast_fp16 = einsum(equation = var_4609_equation_0, values = (var_4523_cast_fp16_2, var_4586_cast_fp16))[name = tensor<string, []>("op_4609_cast_fp16")];
+            tensor<string, []> var_4611_equation_0 = const()[name = tensor<string, []>("op_4611_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4611_cast_fp16 = einsum(equation = var_4611_equation_0, values = (var_4523_cast_fp16_3, var_4587_cast_fp16))[name = tensor<string, []>("op_4611_cast_fp16")];
+            tensor<string, []> var_4613_equation_0 = const()[name = tensor<string, []>("op_4613_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4613_cast_fp16 = einsum(equation = var_4613_equation_0, values = (var_4523_cast_fp16_4, var_4588_cast_fp16))[name = tensor<string, []>("op_4613_cast_fp16")];
+            tensor<string, []> var_4615_equation_0 = const()[name = tensor<string, []>("op_4615_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4615_cast_fp16 = einsum(equation = var_4615_equation_0, values = (var_4523_cast_fp16_5, var_4589_cast_fp16))[name = tensor<string, []>("op_4615_cast_fp16")];
+            tensor<string, []> var_4617_equation_0 = const()[name = tensor<string, []>("op_4617_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4617_cast_fp16 = einsum(equation = var_4617_equation_0, values = (var_4523_cast_fp16_6, var_4590_cast_fp16))[name = tensor<string, []>("op_4617_cast_fp16")];
+            tensor<string, []> var_4619_equation_0 = const()[name = tensor<string, []>("op_4619_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4619_cast_fp16 = einsum(equation = var_4619_equation_0, values = (var_4523_cast_fp16_7, var_4591_cast_fp16))[name = tensor<string, []>("op_4619_cast_fp16")];
+            tensor<string, []> var_4621_equation_0 = const()[name = tensor<string, []>("op_4621_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4621_cast_fp16 = einsum(equation = var_4621_equation_0, values = (var_4523_cast_fp16_8, var_4592_cast_fp16))[name = tensor<string, []>("op_4621_cast_fp16")];
+            tensor<string, []> var_4623_equation_0 = const()[name = tensor<string, []>("op_4623_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4623_cast_fp16 = einsum(equation = var_4623_equation_0, values = (var_4523_cast_fp16_9, var_4593_cast_fp16))[name = tensor<string, []>("op_4623_cast_fp16")];
+            tensor<string, []> var_4625_equation_0 = const()[name = tensor<string, []>("op_4625_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4625_cast_fp16 = einsum(equation = var_4625_equation_0, values = (var_4523_cast_fp16_10, var_4594_cast_fp16))[name = tensor<string, []>("op_4625_cast_fp16")];
+            tensor<string, []> var_4627_equation_0 = const()[name = tensor<string, []>("op_4627_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4627_cast_fp16 = einsum(equation = var_4627_equation_0, values = (var_4523_cast_fp16_11, var_4595_cast_fp16))[name = tensor<string, []>("op_4627_cast_fp16")];
+            tensor<string, []> var_4629_equation_0 = const()[name = tensor<string, []>("op_4629_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4629_cast_fp16 = einsum(equation = var_4629_equation_0, values = (var_4523_cast_fp16_12, var_4596_cast_fp16))[name = tensor<string, []>("op_4629_cast_fp16")];
+            tensor<string, []> var_4631_equation_0 = const()[name = tensor<string, []>("op_4631_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4631_cast_fp16 = einsum(equation = var_4631_equation_0, values = (var_4523_cast_fp16_13, var_4597_cast_fp16))[name = tensor<string, []>("op_4631_cast_fp16")];
+            tensor<string, []> var_4633_equation_0 = const()[name = tensor<string, []>("op_4633_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4633_cast_fp16 = einsum(equation = var_4633_equation_0, values = (var_4523_cast_fp16_14, var_4598_cast_fp16))[name = tensor<string, []>("op_4633_cast_fp16")];
+            tensor<string, []> var_4635_equation_0 = const()[name = tensor<string, []>("op_4635_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4635_cast_fp16 = einsum(equation = var_4635_equation_0, values = (var_4523_cast_fp16_15, var_4599_cast_fp16))[name = tensor<string, []>("op_4635_cast_fp16")];
+            tensor<string, []> var_4637_equation_0 = const()[name = tensor<string, []>("op_4637_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4637_cast_fp16 = einsum(equation = var_4637_equation_0, values = (var_4523_cast_fp16_16, var_4600_cast_fp16))[name = tensor<string, []>("op_4637_cast_fp16")];
+            tensor<string, []> var_4639_equation_0 = const()[name = tensor<string, []>("op_4639_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4639_cast_fp16 = einsum(equation = var_4639_equation_0, values = (var_4523_cast_fp16_17, var_4601_cast_fp16))[name = tensor<string, []>("op_4639_cast_fp16")];
+            tensor<string, []> var_4641_equation_0 = const()[name = tensor<string, []>("op_4641_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4641_cast_fp16 = einsum(equation = var_4641_equation_0, values = (var_4523_cast_fp16_18, var_4602_cast_fp16))[name = tensor<string, []>("op_4641_cast_fp16")];
+            tensor<string, []> var_4643_equation_0 = const()[name = tensor<string, []>("op_4643_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4643_cast_fp16 = einsum(equation = var_4643_equation_0, values = (var_4523_cast_fp16_19, var_4603_cast_fp16))[name = tensor<string, []>("op_4643_cast_fp16")];
+            tensor<bool, []> input_165_interleave_0 = const()[name = tensor<string, []>("input_165_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_165_cast_fp16 = concat(axis = var_4428, interleave = input_165_interleave_0, values = (var_4605_cast_fp16, var_4607_cast_fp16, var_4609_cast_fp16, var_4611_cast_fp16, var_4613_cast_fp16, var_4615_cast_fp16, var_4617_cast_fp16, var_4619_cast_fp16, var_4621_cast_fp16, var_4623_cast_fp16, var_4625_cast_fp16, var_4627_cast_fp16, var_4629_cast_fp16, var_4631_cast_fp16, var_4633_cast_fp16, var_4635_cast_fp16, var_4637_cast_fp16, var_4639_cast_fp16, var_4641_cast_fp16, var_4643_cast_fp16))[name = tensor<string, []>("input_165_cast_fp16")];
+            tensor<string, []> var_4652_pad_type_0 = const()[name = tensor<string, []>("op_4652_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4652_strides_0 = const()[name = tensor<string, []>("op_4652_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4652_pad_0 = const()[name = tensor<string, []>("op_4652_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4652_dilations_0 = const()[name = tensor<string, []>("op_4652_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4652_groups_0 = const()[name = tensor<string, []>("op_4652_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_16_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(653783872)))];
+            tensor<fp16, [1280]> blocks_16_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(657060736)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4652_cast_fp16 = conv(bias = blocks_16_attn_out_bias_to_fp16, dilations = var_4652_dilations_0, groups = var_4652_groups_0, pad = var_4652_pad_0, pad_type = var_4652_pad_type_0, strides = var_4652_strides_0, weight = blocks_16_attn_out_weight_to_fp16, x = input_165_cast_fp16)[name = tensor<string, []>("op_4652_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = var_4652_cast_fp16)[name = tensor<string, []>("inputs_67_cast_fp16")];
+            tensor<int32, [1]> input_167_axes_0 = const()[name = tensor<string, []>("input_167_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_167_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_167_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(657063360)))];
+            tensor<fp16, [1280]> input_167_beta_0_to_fp16 = const()[name = tensor<string, []>("input_167_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(657065984)))];
+            tensor<fp16, []> var_4662_to_fp16 = const()[name = tensor<string, []>("op_4662_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_167_cast_fp16 = layer_norm(axes = input_167_axes_0, beta = input_167_beta_0_to_fp16, epsilon = var_4662_to_fp16, gamma = input_167_gamma_0_to_fp16, x = inputs_67_cast_fp16)[name = tensor<string, []>("input_167_cast_fp16")];
+            tensor<string, []> input_169_pad_type_0 = const()[name = tensor<string, []>("input_169_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_169_strides_0 = const()[name = tensor<string, []>("input_169_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_169_pad_0 = const()[name = tensor<string, []>("input_169_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_169_dilations_0 = const()[name = tensor<string, []>("input_169_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_169_groups_0 = const()[name = tensor<string, []>("input_169_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_16_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(657068608)))];
+            tensor<fp16, [5120]> blocks_16_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(670175872)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_169_cast_fp16 = conv(bias = blocks_16_mlp_0_bias_to_fp16, dilations = input_169_dilations_0, groups = input_169_groups_0, pad = input_169_pad_0, pad_type = input_169_pad_type_0, strides = input_169_strides_0, weight = blocks_16_mlp_0_weight_to_fp16, x = input_167_cast_fp16)[name = tensor<string, []>("input_169_cast_fp16")];
+            tensor<string, []> input_171_mode_0 = const()[name = tensor<string, []>("input_171_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_171_cast_fp16 = gelu(mode = input_171_mode_0, x = input_169_cast_fp16)[name = tensor<string, []>("input_171_cast_fp16")];
+            tensor<string, []> var_4688_pad_type_0 = const()[name = tensor<string, []>("op_4688_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4688_strides_0 = const()[name = tensor<string, []>("op_4688_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4688_pad_0 = const()[name = tensor<string, []>("op_4688_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4688_dilations_0 = const()[name = tensor<string, []>("op_4688_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4688_groups_0 = const()[name = tensor<string, []>("op_4688_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_16_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(670186176)))];
+            tensor<fp16, [1280]> blocks_16_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(683293440)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4688_cast_fp16 = conv(bias = blocks_16_mlp_2_bias_to_fp16, dilations = var_4688_dilations_0, groups = var_4688_groups_0, pad = var_4688_pad_0, pad_type = var_4688_pad_type_0, strides = var_4688_strides_0, weight = blocks_16_mlp_2_weight_to_fp16, x = input_171_cast_fp16)[name = tensor<string, []>("op_4688_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = var_4688_cast_fp16)[name = tensor<string, []>("inputs_69_cast_fp16")];
+            tensor<int32, []> var_4697 = const()[name = tensor<string, []>("op_4697"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_173_axes_0 = const()[name = tensor<string, []>("input_173_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_173_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_173_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(683296064)))];
+            tensor<fp16, [1280]> input_173_beta_0_to_fp16 = const()[name = tensor<string, []>("input_173_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(683298688)))];
+            tensor<fp16, []> var_4713_to_fp16 = const()[name = tensor<string, []>("op_4713_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_173_cast_fp16 = layer_norm(axes = input_173_axes_0, beta = input_173_beta_0_to_fp16, epsilon = var_4713_to_fp16, gamma = input_173_gamma_0_to_fp16, x = inputs_69_cast_fp16)[name = tensor<string, []>("input_173_cast_fp16")];
+            tensor<string, []> q_35_pad_type_0 = const()[name = tensor<string, []>("q_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_35_strides_0 = const()[name = tensor<string, []>("q_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_35_pad_0 = const()[name = tensor<string, []>("q_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_35_dilations_0 = const()[name = tensor<string, []>("q_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_35_groups_0 = const()[name = tensor<string, []>("q_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_4748_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4748_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(683301312)))];
+            tensor<fp16, [1280]> var_4748_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4748_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(686578176)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4748_cast_fp16 = conv(bias = var_4748_bias_0_to_fp16, dilations = q_35_dilations_0, groups = q_35_groups_0, pad = q_35_pad_0, pad_type = q_35_pad_type_0, strides = q_35_strides_0, weight = var_4748_weight_0_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("op_4748_cast_fp16")];
+            tensor<string, []> k_35_pad_type_0 = const()[name = tensor<string, []>("k_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_35_strides_0 = const()[name = tensor<string, []>("k_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_35_pad_0 = const()[name = tensor<string, []>("k_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_35_dilations_0 = const()[name = tensor<string, []>("k_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_35_groups_0 = const()[name = tensor<string, []>("k_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_17_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(686580800)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_35_cast_fp16 = conv(dilations = k_35_dilations_0, groups = k_35_groups_0, pad = k_35_pad_0, pad_type = k_35_pad_type_0, strides = k_35_strides_0, weight = blocks_17_attn_key_weight_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("k_35_cast_fp16")];
+            tensor<string, []> var_4746_pad_type_0 = const()[name = tensor<string, []>("op_4746_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4746_strides_0 = const()[name = tensor<string, []>("op_4746_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4746_pad_0 = const()[name = tensor<string, []>("op_4746_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4746_dilations_0 = const()[name = tensor<string, []>("op_4746_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4746_groups_0 = const()[name = tensor<string, []>("op_4746_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_17_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(689857664)))];
+            tensor<fp16, [1280]> blocks_17_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(693134528)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4746_cast_fp16 = conv(bias = blocks_17_attn_value_bias_to_fp16, dilations = var_4746_dilations_0, groups = var_4746_groups_0, pad = var_4746_pad_0, pad_type = var_4746_pad_type_0, strides = var_4746_strides_0, weight = blocks_17_attn_value_weight_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("op_4746_cast_fp16")];
+            tensor<int32, [20]> tile_51 = const()[name = tensor<string, []>("tile_51"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4749_axis_0 = const()[name = tensor<string, []>("op_4749_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_19 = split(axis = var_4749_axis_0, split_sizes = tile_51, x = var_4748_cast_fp16)[name = tensor<string, []>("op_4749_cast_fp16")];
+            tensor<int32, [4]> var_4770_perm_0 = const()[name = tensor<string, []>("op_4770_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_52 = const()[name = tensor<string, []>("tile_52"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4771_axis_0 = const()[name = tensor<string, []>("op_4771_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_4770_cast_fp16 = transpose(perm = var_4770_perm_0, x = k_35_cast_fp16)[name = tensor<string, []>("transpose_15")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_19 = split(axis = var_4771_axis_0, split_sizes = tile_52, x = var_4770_cast_fp16)[name = tensor<string, []>("op_4771_cast_fp16")];
+            tensor<int32, [20]> tile_53 = const()[name = tensor<string, []>("tile_53"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4792_axis_0 = const()[name = tensor<string, []>("op_4792_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_19 = split(axis = var_4792_axis_0, split_sizes = tile_53, x = var_4746_cast_fp16)[name = tensor<string, []>("op_4792_cast_fp16")];
+            tensor<string, []> aw_681_equation_0 = const()[name = tensor<string, []>("aw_681_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_681_cast_fp16 = einsum(equation = aw_681_equation_0, values = (var_4771_cast_fp16_0, var_4749_cast_fp16_0))[name = tensor<string, []>("aw_681_cast_fp16")];
+            tensor<string, []> aw_683_equation_0 = const()[name = tensor<string, []>("aw_683_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_683_cast_fp16 = einsum(equation = aw_683_equation_0, values = (var_4771_cast_fp16_1, var_4749_cast_fp16_1))[name = tensor<string, []>("aw_683_cast_fp16")];
+            tensor<string, []> aw_685_equation_0 = const()[name = tensor<string, []>("aw_685_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_685_cast_fp16 = einsum(equation = aw_685_equation_0, values = (var_4771_cast_fp16_2, var_4749_cast_fp16_2))[name = tensor<string, []>("aw_685_cast_fp16")];
+            tensor<string, []> aw_687_equation_0 = const()[name = tensor<string, []>("aw_687_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_687_cast_fp16 = einsum(equation = aw_687_equation_0, values = (var_4771_cast_fp16_3, var_4749_cast_fp16_3))[name = tensor<string, []>("aw_687_cast_fp16")];
+            tensor<string, []> aw_689_equation_0 = const()[name = tensor<string, []>("aw_689_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_689_cast_fp16 = einsum(equation = aw_689_equation_0, values = (var_4771_cast_fp16_4, var_4749_cast_fp16_4))[name = tensor<string, []>("aw_689_cast_fp16")];
+            tensor<string, []> aw_691_equation_0 = const()[name = tensor<string, []>("aw_691_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_691_cast_fp16 = einsum(equation = aw_691_equation_0, values = (var_4771_cast_fp16_5, var_4749_cast_fp16_5))[name = tensor<string, []>("aw_691_cast_fp16")];
+            tensor<string, []> aw_693_equation_0 = const()[name = tensor<string, []>("aw_693_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_693_cast_fp16 = einsum(equation = aw_693_equation_0, values = (var_4771_cast_fp16_6, var_4749_cast_fp16_6))[name = tensor<string, []>("aw_693_cast_fp16")];
+            tensor<string, []> aw_695_equation_0 = const()[name = tensor<string, []>("aw_695_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_695_cast_fp16 = einsum(equation = aw_695_equation_0, values = (var_4771_cast_fp16_7, var_4749_cast_fp16_7))[name = tensor<string, []>("aw_695_cast_fp16")];
+            tensor<string, []> aw_697_equation_0 = const()[name = tensor<string, []>("aw_697_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_697_cast_fp16 = einsum(equation = aw_697_equation_0, values = (var_4771_cast_fp16_8, var_4749_cast_fp16_8))[name = tensor<string, []>("aw_697_cast_fp16")];
+            tensor<string, []> aw_699_equation_0 = const()[name = tensor<string, []>("aw_699_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_699_cast_fp16 = einsum(equation = aw_699_equation_0, values = (var_4771_cast_fp16_9, var_4749_cast_fp16_9))[name = tensor<string, []>("aw_699_cast_fp16")];
+            tensor<string, []> aw_701_equation_0 = const()[name = tensor<string, []>("aw_701_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_701_cast_fp16 = einsum(equation = aw_701_equation_0, values = (var_4771_cast_fp16_10, var_4749_cast_fp16_10))[name = tensor<string, []>("aw_701_cast_fp16")];
+            tensor<string, []> aw_703_equation_0 = const()[name = tensor<string, []>("aw_703_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_703_cast_fp16 = einsum(equation = aw_703_equation_0, values = (var_4771_cast_fp16_11, var_4749_cast_fp16_11))[name = tensor<string, []>("aw_703_cast_fp16")];
+            tensor<string, []> aw_705_equation_0 = const()[name = tensor<string, []>("aw_705_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_705_cast_fp16 = einsum(equation = aw_705_equation_0, values = (var_4771_cast_fp16_12, var_4749_cast_fp16_12))[name = tensor<string, []>("aw_705_cast_fp16")];
+            tensor<string, []> aw_707_equation_0 = const()[name = tensor<string, []>("aw_707_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_707_cast_fp16 = einsum(equation = aw_707_equation_0, values = (var_4771_cast_fp16_13, var_4749_cast_fp16_13))[name = tensor<string, []>("aw_707_cast_fp16")];
+            tensor<string, []> aw_709_equation_0 = const()[name = tensor<string, []>("aw_709_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_709_cast_fp16 = einsum(equation = aw_709_equation_0, values = (var_4771_cast_fp16_14, var_4749_cast_fp16_14))[name = tensor<string, []>("aw_709_cast_fp16")];
+            tensor<string, []> aw_711_equation_0 = const()[name = tensor<string, []>("aw_711_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_711_cast_fp16 = einsum(equation = aw_711_equation_0, values = (var_4771_cast_fp16_15, var_4749_cast_fp16_15))[name = tensor<string, []>("aw_711_cast_fp16")];
+            tensor<string, []> aw_713_equation_0 = const()[name = tensor<string, []>("aw_713_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_713_cast_fp16 = einsum(equation = aw_713_equation_0, values = (var_4771_cast_fp16_16, var_4749_cast_fp16_16))[name = tensor<string, []>("aw_713_cast_fp16")];
+            tensor<string, []> aw_715_equation_0 = const()[name = tensor<string, []>("aw_715_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_715_cast_fp16 = einsum(equation = aw_715_equation_0, values = (var_4771_cast_fp16_17, var_4749_cast_fp16_17))[name = tensor<string, []>("aw_715_cast_fp16")];
+            tensor<string, []> aw_717_equation_0 = const()[name = tensor<string, []>("aw_717_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_717_cast_fp16 = einsum(equation = aw_717_equation_0, values = (var_4771_cast_fp16_18, var_4749_cast_fp16_18))[name = tensor<string, []>("aw_717_cast_fp16")];
+            tensor<string, []> aw_719_equation_0 = const()[name = tensor<string, []>("aw_719_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_719_cast_fp16 = einsum(equation = aw_719_equation_0, values = (var_4771_cast_fp16_19, var_4749_cast_fp16_19))[name = tensor<string, []>("aw_719_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4853_cast_fp16 = softmax(axis = var_4697, x = aw_681_cast_fp16)[name = tensor<string, []>("op_4853_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4854_cast_fp16 = softmax(axis = var_4697, x = aw_683_cast_fp16)[name = tensor<string, []>("op_4854_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4855_cast_fp16 = softmax(axis = var_4697, x = aw_685_cast_fp16)[name = tensor<string, []>("op_4855_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4856_cast_fp16 = softmax(axis = var_4697, x = aw_687_cast_fp16)[name = tensor<string, []>("op_4856_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4857_cast_fp16 = softmax(axis = var_4697, x = aw_689_cast_fp16)[name = tensor<string, []>("op_4857_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4858_cast_fp16 = softmax(axis = var_4697, x = aw_691_cast_fp16)[name = tensor<string, []>("op_4858_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4859_cast_fp16 = softmax(axis = var_4697, x = aw_693_cast_fp16)[name = tensor<string, []>("op_4859_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4860_cast_fp16 = softmax(axis = var_4697, x = aw_695_cast_fp16)[name = tensor<string, []>("op_4860_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4861_cast_fp16 = softmax(axis = var_4697, x = aw_697_cast_fp16)[name = tensor<string, []>("op_4861_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4862_cast_fp16 = softmax(axis = var_4697, x = aw_699_cast_fp16)[name = tensor<string, []>("op_4862_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4863_cast_fp16 = softmax(axis = var_4697, x = aw_701_cast_fp16)[name = tensor<string, []>("op_4863_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4864_cast_fp16 = softmax(axis = var_4697, x = aw_703_cast_fp16)[name = tensor<string, []>("op_4864_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4865_cast_fp16 = softmax(axis = var_4697, x = aw_705_cast_fp16)[name = tensor<string, []>("op_4865_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4866_cast_fp16 = softmax(axis = var_4697, x = aw_707_cast_fp16)[name = tensor<string, []>("op_4866_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4867_cast_fp16 = softmax(axis = var_4697, x = aw_709_cast_fp16)[name = tensor<string, []>("op_4867_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4868_cast_fp16 = softmax(axis = var_4697, x = aw_711_cast_fp16)[name = tensor<string, []>("op_4868_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4869_cast_fp16 = softmax(axis = var_4697, x = aw_713_cast_fp16)[name = tensor<string, []>("op_4869_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4870_cast_fp16 = softmax(axis = var_4697, x = aw_715_cast_fp16)[name = tensor<string, []>("op_4870_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4871_cast_fp16 = softmax(axis = var_4697, x = aw_717_cast_fp16)[name = tensor<string, []>("op_4871_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4872_cast_fp16 = softmax(axis = var_4697, x = aw_719_cast_fp16)[name = tensor<string, []>("op_4872_cast_fp16")];
+            tensor<string, []> var_4874_equation_0 = const()[name = tensor<string, []>("op_4874_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4874_cast_fp16 = einsum(equation = var_4874_equation_0, values = (var_4792_cast_fp16_0, var_4853_cast_fp16))[name = tensor<string, []>("op_4874_cast_fp16")];
+            tensor<string, []> var_4876_equation_0 = const()[name = tensor<string, []>("op_4876_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4876_cast_fp16 = einsum(equation = var_4876_equation_0, values = (var_4792_cast_fp16_1, var_4854_cast_fp16))[name = tensor<string, []>("op_4876_cast_fp16")];
+            tensor<string, []> var_4878_equation_0 = const()[name = tensor<string, []>("op_4878_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4878_cast_fp16 = einsum(equation = var_4878_equation_0, values = (var_4792_cast_fp16_2, var_4855_cast_fp16))[name = tensor<string, []>("op_4878_cast_fp16")];
+            tensor<string, []> var_4880_equation_0 = const()[name = tensor<string, []>("op_4880_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4880_cast_fp16 = einsum(equation = var_4880_equation_0, values = (var_4792_cast_fp16_3, var_4856_cast_fp16))[name = tensor<string, []>("op_4880_cast_fp16")];
+            tensor<string, []> var_4882_equation_0 = const()[name = tensor<string, []>("op_4882_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4882_cast_fp16 = einsum(equation = var_4882_equation_0, values = (var_4792_cast_fp16_4, var_4857_cast_fp16))[name = tensor<string, []>("op_4882_cast_fp16")];
+            tensor<string, []> var_4884_equation_0 = const()[name = tensor<string, []>("op_4884_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4884_cast_fp16 = einsum(equation = var_4884_equation_0, values = (var_4792_cast_fp16_5, var_4858_cast_fp16))[name = tensor<string, []>("op_4884_cast_fp16")];
+            tensor<string, []> var_4886_equation_0 = const()[name = tensor<string, []>("op_4886_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4886_cast_fp16 = einsum(equation = var_4886_equation_0, values = (var_4792_cast_fp16_6, var_4859_cast_fp16))[name = tensor<string, []>("op_4886_cast_fp16")];
+            tensor<string, []> var_4888_equation_0 = const()[name = tensor<string, []>("op_4888_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4888_cast_fp16 = einsum(equation = var_4888_equation_0, values = (var_4792_cast_fp16_7, var_4860_cast_fp16))[name = tensor<string, []>("op_4888_cast_fp16")];
+            tensor<string, []> var_4890_equation_0 = const()[name = tensor<string, []>("op_4890_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4890_cast_fp16 = einsum(equation = var_4890_equation_0, values = (var_4792_cast_fp16_8, var_4861_cast_fp16))[name = tensor<string, []>("op_4890_cast_fp16")];
+            tensor<string, []> var_4892_equation_0 = const()[name = tensor<string, []>("op_4892_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4892_cast_fp16 = einsum(equation = var_4892_equation_0, values = (var_4792_cast_fp16_9, var_4862_cast_fp16))[name = tensor<string, []>("op_4892_cast_fp16")];
+            tensor<string, []> var_4894_equation_0 = const()[name = tensor<string, []>("op_4894_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4894_cast_fp16 = einsum(equation = var_4894_equation_0, values = (var_4792_cast_fp16_10, var_4863_cast_fp16))[name = tensor<string, []>("op_4894_cast_fp16")];
+            tensor<string, []> var_4896_equation_0 = const()[name = tensor<string, []>("op_4896_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4896_cast_fp16 = einsum(equation = var_4896_equation_0, values = (var_4792_cast_fp16_11, var_4864_cast_fp16))[name = tensor<string, []>("op_4896_cast_fp16")];
+            tensor<string, []> var_4898_equation_0 = const()[name = tensor<string, []>("op_4898_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4898_cast_fp16 = einsum(equation = var_4898_equation_0, values = (var_4792_cast_fp16_12, var_4865_cast_fp16))[name = tensor<string, []>("op_4898_cast_fp16")];
+            tensor<string, []> var_4900_equation_0 = const()[name = tensor<string, []>("op_4900_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16 = einsum(equation = var_4900_equation_0, values = (var_4792_cast_fp16_13, var_4866_cast_fp16))[name = tensor<string, []>("op_4900_cast_fp16")];
+            tensor<string, []> var_4902_equation_0 = const()[name = tensor<string, []>("op_4902_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4902_cast_fp16 = einsum(equation = var_4902_equation_0, values = (var_4792_cast_fp16_14, var_4867_cast_fp16))[name = tensor<string, []>("op_4902_cast_fp16")];
+            tensor<string, []> var_4904_equation_0 = const()[name = tensor<string, []>("op_4904_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4904_cast_fp16 = einsum(equation = var_4904_equation_0, values = (var_4792_cast_fp16_15, var_4868_cast_fp16))[name = tensor<string, []>("op_4904_cast_fp16")];
+            tensor<string, []> var_4906_equation_0 = const()[name = tensor<string, []>("op_4906_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4906_cast_fp16 = einsum(equation = var_4906_equation_0, values = (var_4792_cast_fp16_16, var_4869_cast_fp16))[name = tensor<string, []>("op_4906_cast_fp16")];
+            tensor<string, []> var_4908_equation_0 = const()[name = tensor<string, []>("op_4908_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4908_cast_fp16 = einsum(equation = var_4908_equation_0, values = (var_4792_cast_fp16_17, var_4870_cast_fp16))[name = tensor<string, []>("op_4908_cast_fp16")];
+            tensor<string, []> var_4910_equation_0 = const()[name = tensor<string, []>("op_4910_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4910_cast_fp16 = einsum(equation = var_4910_equation_0, values = (var_4792_cast_fp16_18, var_4871_cast_fp16))[name = tensor<string, []>("op_4910_cast_fp16")];
+            tensor<string, []> var_4912_equation_0 = const()[name = tensor<string, []>("op_4912_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4912_cast_fp16 = einsum(equation = var_4912_equation_0, values = (var_4792_cast_fp16_19, var_4872_cast_fp16))[name = tensor<string, []>("op_4912_cast_fp16")];
+            tensor<bool, []> input_175_interleave_0 = const()[name = tensor<string, []>("input_175_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_175_cast_fp16 = concat(axis = var_4697, interleave = input_175_interleave_0, values = (var_4874_cast_fp16, var_4876_cast_fp16, var_4878_cast_fp16, var_4880_cast_fp16, var_4882_cast_fp16, var_4884_cast_fp16, var_4886_cast_fp16, var_4888_cast_fp16, var_4890_cast_fp16, var_4892_cast_fp16, var_4894_cast_fp16, var_4896_cast_fp16, var_4898_cast_fp16, var_4900_cast_fp16, var_4902_cast_fp16, var_4904_cast_fp16, var_4906_cast_fp16, var_4908_cast_fp16, var_4910_cast_fp16, var_4912_cast_fp16))[name = tensor<string, []>("input_175_cast_fp16")];
+            tensor<string, []> var_4921_pad_type_0 = const()[name = tensor<string, []>("op_4921_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4921_strides_0 = const()[name = tensor<string, []>("op_4921_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4921_pad_0 = const()[name = tensor<string, []>("op_4921_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4921_dilations_0 = const()[name = tensor<string, []>("op_4921_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4921_groups_0 = const()[name = tensor<string, []>("op_4921_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_17_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(693137152)))];
+            tensor<fp16, [1280]> blocks_17_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(696414016)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4921_cast_fp16 = conv(bias = blocks_17_attn_out_bias_to_fp16, dilations = var_4921_dilations_0, groups = var_4921_groups_0, pad = var_4921_pad_0, pad_type = var_4921_pad_type_0, strides = var_4921_strides_0, weight = blocks_17_attn_out_weight_to_fp16, x = input_175_cast_fp16)[name = tensor<string, []>("op_4921_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = var_4921_cast_fp16)[name = tensor<string, []>("inputs_71_cast_fp16")];
+            tensor<int32, [1]> input_177_axes_0 = const()[name = tensor<string, []>("input_177_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_177_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_177_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(696416640)))];
+            tensor<fp16, [1280]> input_177_beta_0_to_fp16 = const()[name = tensor<string, []>("input_177_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(696419264)))];
+            tensor<fp16, []> var_4931_to_fp16 = const()[name = tensor<string, []>("op_4931_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_177_cast_fp16 = layer_norm(axes = input_177_axes_0, beta = input_177_beta_0_to_fp16, epsilon = var_4931_to_fp16, gamma = input_177_gamma_0_to_fp16, x = inputs_71_cast_fp16)[name = tensor<string, []>("input_177_cast_fp16")];
+            tensor<string, []> input_179_pad_type_0 = const()[name = tensor<string, []>("input_179_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_179_strides_0 = const()[name = tensor<string, []>("input_179_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_179_pad_0 = const()[name = tensor<string, []>("input_179_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_179_dilations_0 = const()[name = tensor<string, []>("input_179_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_179_groups_0 = const()[name = tensor<string, []>("input_179_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_17_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(696421888)))];
+            tensor<fp16, [5120]> blocks_17_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(709529152)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_179_cast_fp16 = conv(bias = blocks_17_mlp_0_bias_to_fp16, dilations = input_179_dilations_0, groups = input_179_groups_0, pad = input_179_pad_0, pad_type = input_179_pad_type_0, strides = input_179_strides_0, weight = blocks_17_mlp_0_weight_to_fp16, x = input_177_cast_fp16)[name = tensor<string, []>("input_179_cast_fp16")];
+            tensor<string, []> input_181_mode_0 = const()[name = tensor<string, []>("input_181_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_181_cast_fp16 = gelu(mode = input_181_mode_0, x = input_179_cast_fp16)[name = tensor<string, []>("input_181_cast_fp16")];
+            tensor<string, []> var_4957_pad_type_0 = const()[name = tensor<string, []>("op_4957_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4957_strides_0 = const()[name = tensor<string, []>("op_4957_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4957_pad_0 = const()[name = tensor<string, []>("op_4957_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4957_dilations_0 = const()[name = tensor<string, []>("op_4957_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4957_groups_0 = const()[name = tensor<string, []>("op_4957_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_17_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(709539456)))];
+            tensor<fp16, [1280]> blocks_17_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(722646720)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4957_cast_fp16 = conv(bias = blocks_17_mlp_2_bias_to_fp16, dilations = var_4957_dilations_0, groups = var_4957_groups_0, pad = var_4957_pad_0, pad_type = var_4957_pad_type_0, strides = var_4957_strides_0, weight = blocks_17_mlp_2_weight_to_fp16, x = input_181_cast_fp16)[name = tensor<string, []>("op_4957_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_73_cast_fp16 = add(x = inputs_71_cast_fp16, y = var_4957_cast_fp16)[name = tensor<string, []>("inputs_73_cast_fp16")];
+            tensor<int32, []> var_4966 = const()[name = tensor<string, []>("op_4966"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_183_axes_0 = const()[name = tensor<string, []>("input_183_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_183_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_183_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(722649344)))];
+            tensor<fp16, [1280]> input_183_beta_0_to_fp16 = const()[name = tensor<string, []>("input_183_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(722651968)))];
+            tensor<fp16, []> var_4982_to_fp16 = const()[name = tensor<string, []>("op_4982_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_183_cast_fp16 = layer_norm(axes = input_183_axes_0, beta = input_183_beta_0_to_fp16, epsilon = var_4982_to_fp16, gamma = input_183_gamma_0_to_fp16, x = inputs_73_cast_fp16)[name = tensor<string, []>("input_183_cast_fp16")];
+            tensor<string, []> q_37_pad_type_0 = const()[name = tensor<string, []>("q_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_37_strides_0 = const()[name = tensor<string, []>("q_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_37_pad_0 = const()[name = tensor<string, []>("q_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_37_dilations_0 = const()[name = tensor<string, []>("q_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_37_groups_0 = const()[name = tensor<string, []>("q_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_5017_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5017_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(722654592)))];
+            tensor<fp16, [1280]> var_5017_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5017_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(725931456)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5017_cast_fp16 = conv(bias = var_5017_bias_0_to_fp16, dilations = q_37_dilations_0, groups = q_37_groups_0, pad = q_37_pad_0, pad_type = q_37_pad_type_0, strides = q_37_strides_0, weight = var_5017_weight_0_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("op_5017_cast_fp16")];
+            tensor<string, []> k_37_pad_type_0 = const()[name = tensor<string, []>("k_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_37_strides_0 = const()[name = tensor<string, []>("k_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_37_pad_0 = const()[name = tensor<string, []>("k_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_37_dilations_0 = const()[name = tensor<string, []>("k_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_37_groups_0 = const()[name = tensor<string, []>("k_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_18_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(725934080)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_37_cast_fp16 = conv(dilations = k_37_dilations_0, groups = k_37_groups_0, pad = k_37_pad_0, pad_type = k_37_pad_type_0, strides = k_37_strides_0, weight = blocks_18_attn_key_weight_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("k_37_cast_fp16")];
+            tensor<string, []> var_5015_pad_type_0 = const()[name = tensor<string, []>("op_5015_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5015_strides_0 = const()[name = tensor<string, []>("op_5015_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5015_pad_0 = const()[name = tensor<string, []>("op_5015_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5015_dilations_0 = const()[name = tensor<string, []>("op_5015_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5015_groups_0 = const()[name = tensor<string, []>("op_5015_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_18_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(729210944)))];
+            tensor<fp16, [1280]> blocks_18_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(732487808)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5015_cast_fp16 = conv(bias = blocks_18_attn_value_bias_to_fp16, dilations = var_5015_dilations_0, groups = var_5015_groups_0, pad = var_5015_pad_0, pad_type = var_5015_pad_type_0, strides = var_5015_strides_0, weight = blocks_18_attn_value_weight_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("op_5015_cast_fp16")];
+            tensor<int32, [20]> tile_54 = const()[name = tensor<string, []>("tile_54"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5018_axis_0 = const()[name = tensor<string, []>("op_5018_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_19 = split(axis = var_5018_axis_0, split_sizes = tile_54, x = var_5017_cast_fp16)[name = tensor<string, []>("op_5018_cast_fp16")];
+            tensor<int32, [4]> var_5039_perm_0 = const()[name = tensor<string, []>("op_5039_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_55 = const()[name = tensor<string, []>("tile_55"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5040_axis_0 = const()[name = tensor<string, []>("op_5040_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_5039_cast_fp16 = transpose(perm = var_5039_perm_0, x = k_37_cast_fp16)[name = tensor<string, []>("transpose_14")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_19 = split(axis = var_5040_axis_0, split_sizes = tile_55, x = var_5039_cast_fp16)[name = tensor<string, []>("op_5040_cast_fp16")];
+            tensor<int32, [20]> tile_56 = const()[name = tensor<string, []>("tile_56"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5061_axis_0 = const()[name = tensor<string, []>("op_5061_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_19 = split(axis = var_5061_axis_0, split_sizes = tile_56, x = var_5015_cast_fp16)[name = tensor<string, []>("op_5061_cast_fp16")];
+            tensor<string, []> aw_721_equation_0 = const()[name = tensor<string, []>("aw_721_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_721_cast_fp16 = einsum(equation = aw_721_equation_0, values = (var_5040_cast_fp16_0, var_5018_cast_fp16_0))[name = tensor<string, []>("aw_721_cast_fp16")];
+            tensor<string, []> aw_723_equation_0 = const()[name = tensor<string, []>("aw_723_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_723_cast_fp16 = einsum(equation = aw_723_equation_0, values = (var_5040_cast_fp16_1, var_5018_cast_fp16_1))[name = tensor<string, []>("aw_723_cast_fp16")];
+            tensor<string, []> aw_725_equation_0 = const()[name = tensor<string, []>("aw_725_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_725_cast_fp16 = einsum(equation = aw_725_equation_0, values = (var_5040_cast_fp16_2, var_5018_cast_fp16_2))[name = tensor<string, []>("aw_725_cast_fp16")];
+            tensor<string, []> aw_727_equation_0 = const()[name = tensor<string, []>("aw_727_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_727_cast_fp16 = einsum(equation = aw_727_equation_0, values = (var_5040_cast_fp16_3, var_5018_cast_fp16_3))[name = tensor<string, []>("aw_727_cast_fp16")];
+            tensor<string, []> aw_729_equation_0 = const()[name = tensor<string, []>("aw_729_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_729_cast_fp16 = einsum(equation = aw_729_equation_0, values = (var_5040_cast_fp16_4, var_5018_cast_fp16_4))[name = tensor<string, []>("aw_729_cast_fp16")];
+            tensor<string, []> aw_731_equation_0 = const()[name = tensor<string, []>("aw_731_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_731_cast_fp16 = einsum(equation = aw_731_equation_0, values = (var_5040_cast_fp16_5, var_5018_cast_fp16_5))[name = tensor<string, []>("aw_731_cast_fp16")];
+            tensor<string, []> aw_733_equation_0 = const()[name = tensor<string, []>("aw_733_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_733_cast_fp16 = einsum(equation = aw_733_equation_0, values = (var_5040_cast_fp16_6, var_5018_cast_fp16_6))[name = tensor<string, []>("aw_733_cast_fp16")];
+            tensor<string, []> aw_735_equation_0 = const()[name = tensor<string, []>("aw_735_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_735_cast_fp16 = einsum(equation = aw_735_equation_0, values = (var_5040_cast_fp16_7, var_5018_cast_fp16_7))[name = tensor<string, []>("aw_735_cast_fp16")];
+            tensor<string, []> aw_737_equation_0 = const()[name = tensor<string, []>("aw_737_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_737_cast_fp16 = einsum(equation = aw_737_equation_0, values = (var_5040_cast_fp16_8, var_5018_cast_fp16_8))[name = tensor<string, []>("aw_737_cast_fp16")];
+            tensor<string, []> aw_739_equation_0 = const()[name = tensor<string, []>("aw_739_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_739_cast_fp16 = einsum(equation = aw_739_equation_0, values = (var_5040_cast_fp16_9, var_5018_cast_fp16_9))[name = tensor<string, []>("aw_739_cast_fp16")];
+            tensor<string, []> aw_741_equation_0 = const()[name = tensor<string, []>("aw_741_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_741_cast_fp16 = einsum(equation = aw_741_equation_0, values = (var_5040_cast_fp16_10, var_5018_cast_fp16_10))[name = tensor<string, []>("aw_741_cast_fp16")];
+            tensor<string, []> aw_743_equation_0 = const()[name = tensor<string, []>("aw_743_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_743_cast_fp16 = einsum(equation = aw_743_equation_0, values = (var_5040_cast_fp16_11, var_5018_cast_fp16_11))[name = tensor<string, []>("aw_743_cast_fp16")];
+            tensor<string, []> aw_745_equation_0 = const()[name = tensor<string, []>("aw_745_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_745_cast_fp16 = einsum(equation = aw_745_equation_0, values = (var_5040_cast_fp16_12, var_5018_cast_fp16_12))[name = tensor<string, []>("aw_745_cast_fp16")];
+            tensor<string, []> aw_747_equation_0 = const()[name = tensor<string, []>("aw_747_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_747_cast_fp16 = einsum(equation = aw_747_equation_0, values = (var_5040_cast_fp16_13, var_5018_cast_fp16_13))[name = tensor<string, []>("aw_747_cast_fp16")];
+            tensor<string, []> aw_749_equation_0 = const()[name = tensor<string, []>("aw_749_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_749_cast_fp16 = einsum(equation = aw_749_equation_0, values = (var_5040_cast_fp16_14, var_5018_cast_fp16_14))[name = tensor<string, []>("aw_749_cast_fp16")];
+            tensor<string, []> aw_751_equation_0 = const()[name = tensor<string, []>("aw_751_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_751_cast_fp16 = einsum(equation = aw_751_equation_0, values = (var_5040_cast_fp16_15, var_5018_cast_fp16_15))[name = tensor<string, []>("aw_751_cast_fp16")];
+            tensor<string, []> aw_753_equation_0 = const()[name = tensor<string, []>("aw_753_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_753_cast_fp16 = einsum(equation = aw_753_equation_0, values = (var_5040_cast_fp16_16, var_5018_cast_fp16_16))[name = tensor<string, []>("aw_753_cast_fp16")];
+            tensor<string, []> aw_755_equation_0 = const()[name = tensor<string, []>("aw_755_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_755_cast_fp16 = einsum(equation = aw_755_equation_0, values = (var_5040_cast_fp16_17, var_5018_cast_fp16_17))[name = tensor<string, []>("aw_755_cast_fp16")];
+            tensor<string, []> aw_757_equation_0 = const()[name = tensor<string, []>("aw_757_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_757_cast_fp16 = einsum(equation = aw_757_equation_0, values = (var_5040_cast_fp16_18, var_5018_cast_fp16_18))[name = tensor<string, []>("aw_757_cast_fp16")];
+            tensor<string, []> aw_759_equation_0 = const()[name = tensor<string, []>("aw_759_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_759_cast_fp16 = einsum(equation = aw_759_equation_0, values = (var_5040_cast_fp16_19, var_5018_cast_fp16_19))[name = tensor<string, []>("aw_759_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5122_cast_fp16 = softmax(axis = var_4966, x = aw_721_cast_fp16)[name = tensor<string, []>("op_5122_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5123_cast_fp16 = softmax(axis = var_4966, x = aw_723_cast_fp16)[name = tensor<string, []>("op_5123_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5124_cast_fp16 = softmax(axis = var_4966, x = aw_725_cast_fp16)[name = tensor<string, []>("op_5124_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5125_cast_fp16 = softmax(axis = var_4966, x = aw_727_cast_fp16)[name = tensor<string, []>("op_5125_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5126_cast_fp16 = softmax(axis = var_4966, x = aw_729_cast_fp16)[name = tensor<string, []>("op_5126_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5127_cast_fp16 = softmax(axis = var_4966, x = aw_731_cast_fp16)[name = tensor<string, []>("op_5127_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5128_cast_fp16 = softmax(axis = var_4966, x = aw_733_cast_fp16)[name = tensor<string, []>("op_5128_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5129_cast_fp16 = softmax(axis = var_4966, x = aw_735_cast_fp16)[name = tensor<string, []>("op_5129_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5130_cast_fp16 = softmax(axis = var_4966, x = aw_737_cast_fp16)[name = tensor<string, []>("op_5130_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5131_cast_fp16 = softmax(axis = var_4966, x = aw_739_cast_fp16)[name = tensor<string, []>("op_5131_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5132_cast_fp16 = softmax(axis = var_4966, x = aw_741_cast_fp16)[name = tensor<string, []>("op_5132_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5133_cast_fp16 = softmax(axis = var_4966, x = aw_743_cast_fp16)[name = tensor<string, []>("op_5133_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5134_cast_fp16 = softmax(axis = var_4966, x = aw_745_cast_fp16)[name = tensor<string, []>("op_5134_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5135_cast_fp16 = softmax(axis = var_4966, x = aw_747_cast_fp16)[name = tensor<string, []>("op_5135_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5136_cast_fp16 = softmax(axis = var_4966, x = aw_749_cast_fp16)[name = tensor<string, []>("op_5136_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5137_cast_fp16 = softmax(axis = var_4966, x = aw_751_cast_fp16)[name = tensor<string, []>("op_5137_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5138_cast_fp16 = softmax(axis = var_4966, x = aw_753_cast_fp16)[name = tensor<string, []>("op_5138_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5139_cast_fp16 = softmax(axis = var_4966, x = aw_755_cast_fp16)[name = tensor<string, []>("op_5139_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5140_cast_fp16 = softmax(axis = var_4966, x = aw_757_cast_fp16)[name = tensor<string, []>("op_5140_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5141_cast_fp16 = softmax(axis = var_4966, x = aw_759_cast_fp16)[name = tensor<string, []>("op_5141_cast_fp16")];
+            tensor<string, []> var_5143_equation_0 = const()[name = tensor<string, []>("op_5143_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5143_cast_fp16 = einsum(equation = var_5143_equation_0, values = (var_5061_cast_fp16_0, var_5122_cast_fp16))[name = tensor<string, []>("op_5143_cast_fp16")];
+            tensor<string, []> var_5145_equation_0 = const()[name = tensor<string, []>("op_5145_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5145_cast_fp16 = einsum(equation = var_5145_equation_0, values = (var_5061_cast_fp16_1, var_5123_cast_fp16))[name = tensor<string, []>("op_5145_cast_fp16")];
+            tensor<string, []> var_5147_equation_0 = const()[name = tensor<string, []>("op_5147_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5147_cast_fp16 = einsum(equation = var_5147_equation_0, values = (var_5061_cast_fp16_2, var_5124_cast_fp16))[name = tensor<string, []>("op_5147_cast_fp16")];
+            tensor<string, []> var_5149_equation_0 = const()[name = tensor<string, []>("op_5149_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5149_cast_fp16 = einsum(equation = var_5149_equation_0, values = (var_5061_cast_fp16_3, var_5125_cast_fp16))[name = tensor<string, []>("op_5149_cast_fp16")];
+            tensor<string, []> var_5151_equation_0 = const()[name = tensor<string, []>("op_5151_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5151_cast_fp16 = einsum(equation = var_5151_equation_0, values = (var_5061_cast_fp16_4, var_5126_cast_fp16))[name = tensor<string, []>("op_5151_cast_fp16")];
+            tensor<string, []> var_5153_equation_0 = const()[name = tensor<string, []>("op_5153_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5153_cast_fp16 = einsum(equation = var_5153_equation_0, values = (var_5061_cast_fp16_5, var_5127_cast_fp16))[name = tensor<string, []>("op_5153_cast_fp16")];
+            tensor<string, []> var_5155_equation_0 = const()[name = tensor<string, []>("op_5155_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5155_cast_fp16 = einsum(equation = var_5155_equation_0, values = (var_5061_cast_fp16_6, var_5128_cast_fp16))[name = tensor<string, []>("op_5155_cast_fp16")];
+            tensor<string, []> var_5157_equation_0 = const()[name = tensor<string, []>("op_5157_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5157_cast_fp16 = einsum(equation = var_5157_equation_0, values = (var_5061_cast_fp16_7, var_5129_cast_fp16))[name = tensor<string, []>("op_5157_cast_fp16")];
+            tensor<string, []> var_5159_equation_0 = const()[name = tensor<string, []>("op_5159_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5159_cast_fp16 = einsum(equation = var_5159_equation_0, values = (var_5061_cast_fp16_8, var_5130_cast_fp16))[name = tensor<string, []>("op_5159_cast_fp16")];
+            tensor<string, []> var_5161_equation_0 = const()[name = tensor<string, []>("op_5161_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5161_cast_fp16 = einsum(equation = var_5161_equation_0, values = (var_5061_cast_fp16_9, var_5131_cast_fp16))[name = tensor<string, []>("op_5161_cast_fp16")];
+            tensor<string, []> var_5163_equation_0 = const()[name = tensor<string, []>("op_5163_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5163_cast_fp16 = einsum(equation = var_5163_equation_0, values = (var_5061_cast_fp16_10, var_5132_cast_fp16))[name = tensor<string, []>("op_5163_cast_fp16")];
+            tensor<string, []> var_5165_equation_0 = const()[name = tensor<string, []>("op_5165_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5165_cast_fp16 = einsum(equation = var_5165_equation_0, values = (var_5061_cast_fp16_11, var_5133_cast_fp16))[name = tensor<string, []>("op_5165_cast_fp16")];
+            tensor<string, []> var_5167_equation_0 = const()[name = tensor<string, []>("op_5167_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5167_cast_fp16 = einsum(equation = var_5167_equation_0, values = (var_5061_cast_fp16_12, var_5134_cast_fp16))[name = tensor<string, []>("op_5167_cast_fp16")];
+            tensor<string, []> var_5169_equation_0 = const()[name = tensor<string, []>("op_5169_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5169_cast_fp16 = einsum(equation = var_5169_equation_0, values = (var_5061_cast_fp16_13, var_5135_cast_fp16))[name = tensor<string, []>("op_5169_cast_fp16")];
+            tensor<string, []> var_5171_equation_0 = const()[name = tensor<string, []>("op_5171_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5171_cast_fp16 = einsum(equation = var_5171_equation_0, values = (var_5061_cast_fp16_14, var_5136_cast_fp16))[name = tensor<string, []>("op_5171_cast_fp16")];
+            tensor<string, []> var_5173_equation_0 = const()[name = tensor<string, []>("op_5173_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5173_cast_fp16 = einsum(equation = var_5173_equation_0, values = (var_5061_cast_fp16_15, var_5137_cast_fp16))[name = tensor<string, []>("op_5173_cast_fp16")];
+            tensor<string, []> var_5175_equation_0 = const()[name = tensor<string, []>("op_5175_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5175_cast_fp16 = einsum(equation = var_5175_equation_0, values = (var_5061_cast_fp16_16, var_5138_cast_fp16))[name = tensor<string, []>("op_5175_cast_fp16")];
+            tensor<string, []> var_5177_equation_0 = const()[name = tensor<string, []>("op_5177_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5177_cast_fp16 = einsum(equation = var_5177_equation_0, values = (var_5061_cast_fp16_17, var_5139_cast_fp16))[name = tensor<string, []>("op_5177_cast_fp16")];
+            tensor<string, []> var_5179_equation_0 = const()[name = tensor<string, []>("op_5179_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5179_cast_fp16 = einsum(equation = var_5179_equation_0, values = (var_5061_cast_fp16_18, var_5140_cast_fp16))[name = tensor<string, []>("op_5179_cast_fp16")];
+            tensor<string, []> var_5181_equation_0 = const()[name = tensor<string, []>("op_5181_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5181_cast_fp16 = einsum(equation = var_5181_equation_0, values = (var_5061_cast_fp16_19, var_5141_cast_fp16))[name = tensor<string, []>("op_5181_cast_fp16")];
+            tensor<bool, []> input_185_interleave_0 = const()[name = tensor<string, []>("input_185_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_185_cast_fp16 = concat(axis = var_4966, interleave = input_185_interleave_0, values = (var_5143_cast_fp16, var_5145_cast_fp16, var_5147_cast_fp16, var_5149_cast_fp16, var_5151_cast_fp16, var_5153_cast_fp16, var_5155_cast_fp16, var_5157_cast_fp16, var_5159_cast_fp16, var_5161_cast_fp16, var_5163_cast_fp16, var_5165_cast_fp16, var_5167_cast_fp16, var_5169_cast_fp16, var_5171_cast_fp16, var_5173_cast_fp16, var_5175_cast_fp16, var_5177_cast_fp16, var_5179_cast_fp16, var_5181_cast_fp16))[name = tensor<string, []>("input_185_cast_fp16")];
+            tensor<string, []> var_5190_pad_type_0 = const()[name = tensor<string, []>("op_5190_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5190_strides_0 = const()[name = tensor<string, []>("op_5190_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5190_pad_0 = const()[name = tensor<string, []>("op_5190_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5190_dilations_0 = const()[name = tensor<string, []>("op_5190_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5190_groups_0 = const()[name = tensor<string, []>("op_5190_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_18_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(732490432)))];
+            tensor<fp16, [1280]> blocks_18_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(735767296)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5190_cast_fp16 = conv(bias = blocks_18_attn_out_bias_to_fp16, dilations = var_5190_dilations_0, groups = var_5190_groups_0, pad = var_5190_pad_0, pad_type = var_5190_pad_type_0, strides = var_5190_strides_0, weight = blocks_18_attn_out_weight_to_fp16, x = input_185_cast_fp16)[name = tensor<string, []>("op_5190_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_75_cast_fp16 = add(x = inputs_73_cast_fp16, y = var_5190_cast_fp16)[name = tensor<string, []>("inputs_75_cast_fp16")];
+            tensor<int32, [1]> input_187_axes_0 = const()[name = tensor<string, []>("input_187_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_187_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_187_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(735769920)))];
+            tensor<fp16, [1280]> input_187_beta_0_to_fp16 = const()[name = tensor<string, []>("input_187_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(735772544)))];
+            tensor<fp16, []> var_5200_to_fp16 = const()[name = tensor<string, []>("op_5200_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_187_cast_fp16 = layer_norm(axes = input_187_axes_0, beta = input_187_beta_0_to_fp16, epsilon = var_5200_to_fp16, gamma = input_187_gamma_0_to_fp16, x = inputs_75_cast_fp16)[name = tensor<string, []>("input_187_cast_fp16")];
+            tensor<string, []> input_189_pad_type_0 = const()[name = tensor<string, []>("input_189_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_189_strides_0 = const()[name = tensor<string, []>("input_189_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_189_pad_0 = const()[name = tensor<string, []>("input_189_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_189_dilations_0 = const()[name = tensor<string, []>("input_189_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_189_groups_0 = const()[name = tensor<string, []>("input_189_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_18_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(735775168)))];
+            tensor<fp16, [5120]> blocks_18_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(748882432)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_189_cast_fp16 = conv(bias = blocks_18_mlp_0_bias_to_fp16, dilations = input_189_dilations_0, groups = input_189_groups_0, pad = input_189_pad_0, pad_type = input_189_pad_type_0, strides = input_189_strides_0, weight = blocks_18_mlp_0_weight_to_fp16, x = input_187_cast_fp16)[name = tensor<string, []>("input_189_cast_fp16")];
+            tensor<string, []> input_191_mode_0 = const()[name = tensor<string, []>("input_191_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_191_cast_fp16 = gelu(mode = input_191_mode_0, x = input_189_cast_fp16)[name = tensor<string, []>("input_191_cast_fp16")];
+            tensor<string, []> var_5226_pad_type_0 = const()[name = tensor<string, []>("op_5226_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5226_strides_0 = const()[name = tensor<string, []>("op_5226_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5226_pad_0 = const()[name = tensor<string, []>("op_5226_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5226_dilations_0 = const()[name = tensor<string, []>("op_5226_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5226_groups_0 = const()[name = tensor<string, []>("op_5226_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_18_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(748892736)))];
+            tensor<fp16, [1280]> blocks_18_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(762000000)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5226_cast_fp16 = conv(bias = blocks_18_mlp_2_bias_to_fp16, dilations = var_5226_dilations_0, groups = var_5226_groups_0, pad = var_5226_pad_0, pad_type = var_5226_pad_type_0, strides = var_5226_strides_0, weight = blocks_18_mlp_2_weight_to_fp16, x = input_191_cast_fp16)[name = tensor<string, []>("op_5226_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_77_cast_fp16 = add(x = inputs_75_cast_fp16, y = var_5226_cast_fp16)[name = tensor<string, []>("inputs_77_cast_fp16")];
+            tensor<int32, []> var_5235 = const()[name = tensor<string, []>("op_5235"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_193_axes_0 = const()[name = tensor<string, []>("input_193_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_193_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_193_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(762002624)))];
+            tensor<fp16, [1280]> input_193_beta_0_to_fp16 = const()[name = tensor<string, []>("input_193_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(762005248)))];
+            tensor<fp16, []> var_5251_to_fp16 = const()[name = tensor<string, []>("op_5251_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_193_cast_fp16 = layer_norm(axes = input_193_axes_0, beta = input_193_beta_0_to_fp16, epsilon = var_5251_to_fp16, gamma = input_193_gamma_0_to_fp16, x = inputs_77_cast_fp16)[name = tensor<string, []>("input_193_cast_fp16")];
+            tensor<string, []> q_39_pad_type_0 = const()[name = tensor<string, []>("q_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_39_strides_0 = const()[name = tensor<string, []>("q_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_39_pad_0 = const()[name = tensor<string, []>("q_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_39_dilations_0 = const()[name = tensor<string, []>("q_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_39_groups_0 = const()[name = tensor<string, []>("q_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_5286_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5286_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(762007872)))];
+            tensor<fp16, [1280]> var_5286_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5286_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(765284736)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5286_cast_fp16 = conv(bias = var_5286_bias_0_to_fp16, dilations = q_39_dilations_0, groups = q_39_groups_0, pad = q_39_pad_0, pad_type = q_39_pad_type_0, strides = q_39_strides_0, weight = var_5286_weight_0_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("op_5286_cast_fp16")];
+            tensor<string, []> k_39_pad_type_0 = const()[name = tensor<string, []>("k_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_39_strides_0 = const()[name = tensor<string, []>("k_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_39_pad_0 = const()[name = tensor<string, []>("k_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_39_dilations_0 = const()[name = tensor<string, []>("k_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_39_groups_0 = const()[name = tensor<string, []>("k_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_19_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(765287360)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_39_cast_fp16 = conv(dilations = k_39_dilations_0, groups = k_39_groups_0, pad = k_39_pad_0, pad_type = k_39_pad_type_0, strides = k_39_strides_0, weight = blocks_19_attn_key_weight_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("k_39_cast_fp16")];
+            tensor<string, []> var_5284_pad_type_0 = const()[name = tensor<string, []>("op_5284_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5284_strides_0 = const()[name = tensor<string, []>("op_5284_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5284_pad_0 = const()[name = tensor<string, []>("op_5284_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5284_dilations_0 = const()[name = tensor<string, []>("op_5284_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5284_groups_0 = const()[name = tensor<string, []>("op_5284_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_19_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(768564224)))];
+            tensor<fp16, [1280]> blocks_19_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(771841088)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5284_cast_fp16 = conv(bias = blocks_19_attn_value_bias_to_fp16, dilations = var_5284_dilations_0, groups = var_5284_groups_0, pad = var_5284_pad_0, pad_type = var_5284_pad_type_0, strides = var_5284_strides_0, weight = blocks_19_attn_value_weight_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("op_5284_cast_fp16")];
+            tensor<int32, [20]> tile_57 = const()[name = tensor<string, []>("tile_57"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5287_axis_0 = const()[name = tensor<string, []>("op_5287_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_19 = split(axis = var_5287_axis_0, split_sizes = tile_57, x = var_5286_cast_fp16)[name = tensor<string, []>("op_5287_cast_fp16")];
+            tensor<int32, [4]> var_5308_perm_0 = const()[name = tensor<string, []>("op_5308_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_58 = const()[name = tensor<string, []>("tile_58"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5309_axis_0 = const()[name = tensor<string, []>("op_5309_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_5308_cast_fp16 = transpose(perm = var_5308_perm_0, x = k_39_cast_fp16)[name = tensor<string, []>("transpose_13")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_19 = split(axis = var_5309_axis_0, split_sizes = tile_58, x = var_5308_cast_fp16)[name = tensor<string, []>("op_5309_cast_fp16")];
+            tensor<int32, [20]> tile_59 = const()[name = tensor<string, []>("tile_59"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5330_axis_0 = const()[name = tensor<string, []>("op_5330_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_19 = split(axis = var_5330_axis_0, split_sizes = tile_59, x = var_5284_cast_fp16)[name = tensor<string, []>("op_5330_cast_fp16")];
+            tensor<string, []> aw_761_equation_0 = const()[name = tensor<string, []>("aw_761_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_761_cast_fp16 = einsum(equation = aw_761_equation_0, values = (var_5309_cast_fp16_0, var_5287_cast_fp16_0))[name = tensor<string, []>("aw_761_cast_fp16")];
+            tensor<string, []> aw_763_equation_0 = const()[name = tensor<string, []>("aw_763_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_763_cast_fp16 = einsum(equation = aw_763_equation_0, values = (var_5309_cast_fp16_1, var_5287_cast_fp16_1))[name = tensor<string, []>("aw_763_cast_fp16")];
+            tensor<string, []> aw_765_equation_0 = const()[name = tensor<string, []>("aw_765_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_765_cast_fp16 = einsum(equation = aw_765_equation_0, values = (var_5309_cast_fp16_2, var_5287_cast_fp16_2))[name = tensor<string, []>("aw_765_cast_fp16")];
+            tensor<string, []> aw_767_equation_0 = const()[name = tensor<string, []>("aw_767_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_767_cast_fp16 = einsum(equation = aw_767_equation_0, values = (var_5309_cast_fp16_3, var_5287_cast_fp16_3))[name = tensor<string, []>("aw_767_cast_fp16")];
+            tensor<string, []> aw_769_equation_0 = const()[name = tensor<string, []>("aw_769_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_769_cast_fp16 = einsum(equation = aw_769_equation_0, values = (var_5309_cast_fp16_4, var_5287_cast_fp16_4))[name = tensor<string, []>("aw_769_cast_fp16")];
+            tensor<string, []> aw_771_equation_0 = const()[name = tensor<string, []>("aw_771_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_771_cast_fp16 = einsum(equation = aw_771_equation_0, values = (var_5309_cast_fp16_5, var_5287_cast_fp16_5))[name = tensor<string, []>("aw_771_cast_fp16")];
+            tensor<string, []> aw_773_equation_0 = const()[name = tensor<string, []>("aw_773_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_773_cast_fp16 = einsum(equation = aw_773_equation_0, values = (var_5309_cast_fp16_6, var_5287_cast_fp16_6))[name = tensor<string, []>("aw_773_cast_fp16")];
+            tensor<string, []> aw_775_equation_0 = const()[name = tensor<string, []>("aw_775_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_775_cast_fp16 = einsum(equation = aw_775_equation_0, values = (var_5309_cast_fp16_7, var_5287_cast_fp16_7))[name = tensor<string, []>("aw_775_cast_fp16")];
+            tensor<string, []> aw_777_equation_0 = const()[name = tensor<string, []>("aw_777_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_777_cast_fp16 = einsum(equation = aw_777_equation_0, values = (var_5309_cast_fp16_8, var_5287_cast_fp16_8))[name = tensor<string, []>("aw_777_cast_fp16")];
+            tensor<string, []> aw_779_equation_0 = const()[name = tensor<string, []>("aw_779_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_779_cast_fp16 = einsum(equation = aw_779_equation_0, values = (var_5309_cast_fp16_9, var_5287_cast_fp16_9))[name = tensor<string, []>("aw_779_cast_fp16")];
+            tensor<string, []> aw_781_equation_0 = const()[name = tensor<string, []>("aw_781_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_781_cast_fp16 = einsum(equation = aw_781_equation_0, values = (var_5309_cast_fp16_10, var_5287_cast_fp16_10))[name = tensor<string, []>("aw_781_cast_fp16")];
+            tensor<string, []> aw_783_equation_0 = const()[name = tensor<string, []>("aw_783_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_783_cast_fp16 = einsum(equation = aw_783_equation_0, values = (var_5309_cast_fp16_11, var_5287_cast_fp16_11))[name = tensor<string, []>("aw_783_cast_fp16")];
+            tensor<string, []> aw_785_equation_0 = const()[name = tensor<string, []>("aw_785_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_785_cast_fp16 = einsum(equation = aw_785_equation_0, values = (var_5309_cast_fp16_12, var_5287_cast_fp16_12))[name = tensor<string, []>("aw_785_cast_fp16")];
+            tensor<string, []> aw_787_equation_0 = const()[name = tensor<string, []>("aw_787_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_787_cast_fp16 = einsum(equation = aw_787_equation_0, values = (var_5309_cast_fp16_13, var_5287_cast_fp16_13))[name = tensor<string, []>("aw_787_cast_fp16")];
+            tensor<string, []> aw_789_equation_0 = const()[name = tensor<string, []>("aw_789_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_789_cast_fp16 = einsum(equation = aw_789_equation_0, values = (var_5309_cast_fp16_14, var_5287_cast_fp16_14))[name = tensor<string, []>("aw_789_cast_fp16")];
+            tensor<string, []> aw_791_equation_0 = const()[name = tensor<string, []>("aw_791_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_791_cast_fp16 = einsum(equation = aw_791_equation_0, values = (var_5309_cast_fp16_15, var_5287_cast_fp16_15))[name = tensor<string, []>("aw_791_cast_fp16")];
+            tensor<string, []> aw_793_equation_0 = const()[name = tensor<string, []>("aw_793_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_793_cast_fp16 = einsum(equation = aw_793_equation_0, values = (var_5309_cast_fp16_16, var_5287_cast_fp16_16))[name = tensor<string, []>("aw_793_cast_fp16")];
+            tensor<string, []> aw_795_equation_0 = const()[name = tensor<string, []>("aw_795_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_795_cast_fp16 = einsum(equation = aw_795_equation_0, values = (var_5309_cast_fp16_17, var_5287_cast_fp16_17))[name = tensor<string, []>("aw_795_cast_fp16")];
+            tensor<string, []> aw_797_equation_0 = const()[name = tensor<string, []>("aw_797_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_797_cast_fp16 = einsum(equation = aw_797_equation_0, values = (var_5309_cast_fp16_18, var_5287_cast_fp16_18))[name = tensor<string, []>("aw_797_cast_fp16")];
+            tensor<string, []> aw_799_equation_0 = const()[name = tensor<string, []>("aw_799_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_799_cast_fp16 = einsum(equation = aw_799_equation_0, values = (var_5309_cast_fp16_19, var_5287_cast_fp16_19))[name = tensor<string, []>("aw_799_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5391_cast_fp16 = softmax(axis = var_5235, x = aw_761_cast_fp16)[name = tensor<string, []>("op_5391_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5392_cast_fp16 = softmax(axis = var_5235, x = aw_763_cast_fp16)[name = tensor<string, []>("op_5392_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5393_cast_fp16 = softmax(axis = var_5235, x = aw_765_cast_fp16)[name = tensor<string, []>("op_5393_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5394_cast_fp16 = softmax(axis = var_5235, x = aw_767_cast_fp16)[name = tensor<string, []>("op_5394_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5395_cast_fp16 = softmax(axis = var_5235, x = aw_769_cast_fp16)[name = tensor<string, []>("op_5395_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5396_cast_fp16 = softmax(axis = var_5235, x = aw_771_cast_fp16)[name = tensor<string, []>("op_5396_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5397_cast_fp16 = softmax(axis = var_5235, x = aw_773_cast_fp16)[name = tensor<string, []>("op_5397_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5398_cast_fp16 = softmax(axis = var_5235, x = aw_775_cast_fp16)[name = tensor<string, []>("op_5398_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5399_cast_fp16 = softmax(axis = var_5235, x = aw_777_cast_fp16)[name = tensor<string, []>("op_5399_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5400_cast_fp16 = softmax(axis = var_5235, x = aw_779_cast_fp16)[name = tensor<string, []>("op_5400_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5401_cast_fp16 = softmax(axis = var_5235, x = aw_781_cast_fp16)[name = tensor<string, []>("op_5401_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5402_cast_fp16 = softmax(axis = var_5235, x = aw_783_cast_fp16)[name = tensor<string, []>("op_5402_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5403_cast_fp16 = softmax(axis = var_5235, x = aw_785_cast_fp16)[name = tensor<string, []>("op_5403_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5404_cast_fp16 = softmax(axis = var_5235, x = aw_787_cast_fp16)[name = tensor<string, []>("op_5404_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5405_cast_fp16 = softmax(axis = var_5235, x = aw_789_cast_fp16)[name = tensor<string, []>("op_5405_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5406_cast_fp16 = softmax(axis = var_5235, x = aw_791_cast_fp16)[name = tensor<string, []>("op_5406_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5407_cast_fp16 = softmax(axis = var_5235, x = aw_793_cast_fp16)[name = tensor<string, []>("op_5407_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5408_cast_fp16 = softmax(axis = var_5235, x = aw_795_cast_fp16)[name = tensor<string, []>("op_5408_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5409_cast_fp16 = softmax(axis = var_5235, x = aw_797_cast_fp16)[name = tensor<string, []>("op_5409_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5410_cast_fp16 = softmax(axis = var_5235, x = aw_799_cast_fp16)[name = tensor<string, []>("op_5410_cast_fp16")];
+            tensor<string, []> var_5412_equation_0 = const()[name = tensor<string, []>("op_5412_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5412_cast_fp16 = einsum(equation = var_5412_equation_0, values = (var_5330_cast_fp16_0, var_5391_cast_fp16))[name = tensor<string, []>("op_5412_cast_fp16")];
+            tensor<string, []> var_5414_equation_0 = const()[name = tensor<string, []>("op_5414_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5414_cast_fp16 = einsum(equation = var_5414_equation_0, values = (var_5330_cast_fp16_1, var_5392_cast_fp16))[name = tensor<string, []>("op_5414_cast_fp16")];
+            tensor<string, []> var_5416_equation_0 = const()[name = tensor<string, []>("op_5416_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5416_cast_fp16 = einsum(equation = var_5416_equation_0, values = (var_5330_cast_fp16_2, var_5393_cast_fp16))[name = tensor<string, []>("op_5416_cast_fp16")];
+            tensor<string, []> var_5418_equation_0 = const()[name = tensor<string, []>("op_5418_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5418_cast_fp16 = einsum(equation = var_5418_equation_0, values = (var_5330_cast_fp16_3, var_5394_cast_fp16))[name = tensor<string, []>("op_5418_cast_fp16")];
+            tensor<string, []> var_5420_equation_0 = const()[name = tensor<string, []>("op_5420_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5420_cast_fp16 = einsum(equation = var_5420_equation_0, values = (var_5330_cast_fp16_4, var_5395_cast_fp16))[name = tensor<string, []>("op_5420_cast_fp16")];
+            tensor<string, []> var_5422_equation_0 = const()[name = tensor<string, []>("op_5422_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5422_cast_fp16 = einsum(equation = var_5422_equation_0, values = (var_5330_cast_fp16_5, var_5396_cast_fp16))[name = tensor<string, []>("op_5422_cast_fp16")];
+            tensor<string, []> var_5424_equation_0 = const()[name = tensor<string, []>("op_5424_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5424_cast_fp16 = einsum(equation = var_5424_equation_0, values = (var_5330_cast_fp16_6, var_5397_cast_fp16))[name = tensor<string, []>("op_5424_cast_fp16")];
+            tensor<string, []> var_5426_equation_0 = const()[name = tensor<string, []>("op_5426_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5426_cast_fp16 = einsum(equation = var_5426_equation_0, values = (var_5330_cast_fp16_7, var_5398_cast_fp16))[name = tensor<string, []>("op_5426_cast_fp16")];
+            tensor<string, []> var_5428_equation_0 = const()[name = tensor<string, []>("op_5428_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5428_cast_fp16 = einsum(equation = var_5428_equation_0, values = (var_5330_cast_fp16_8, var_5399_cast_fp16))[name = tensor<string, []>("op_5428_cast_fp16")];
+            tensor<string, []> var_5430_equation_0 = const()[name = tensor<string, []>("op_5430_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5430_cast_fp16 = einsum(equation = var_5430_equation_0, values = (var_5330_cast_fp16_9, var_5400_cast_fp16))[name = tensor<string, []>("op_5430_cast_fp16")];
+            tensor<string, []> var_5432_equation_0 = const()[name = tensor<string, []>("op_5432_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5432_cast_fp16 = einsum(equation = var_5432_equation_0, values = (var_5330_cast_fp16_10, var_5401_cast_fp16))[name = tensor<string, []>("op_5432_cast_fp16")];
+            tensor<string, []> var_5434_equation_0 = const()[name = tensor<string, []>("op_5434_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5434_cast_fp16 = einsum(equation = var_5434_equation_0, values = (var_5330_cast_fp16_11, var_5402_cast_fp16))[name = tensor<string, []>("op_5434_cast_fp16")];
+            tensor<string, []> var_5436_equation_0 = const()[name = tensor<string, []>("op_5436_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5436_cast_fp16 = einsum(equation = var_5436_equation_0, values = (var_5330_cast_fp16_12, var_5403_cast_fp16))[name = tensor<string, []>("op_5436_cast_fp16")];
+            tensor<string, []> var_5438_equation_0 = const()[name = tensor<string, []>("op_5438_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5438_cast_fp16 = einsum(equation = var_5438_equation_0, values = (var_5330_cast_fp16_13, var_5404_cast_fp16))[name = tensor<string, []>("op_5438_cast_fp16")];
+            tensor<string, []> var_5440_equation_0 = const()[name = tensor<string, []>("op_5440_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5440_cast_fp16 = einsum(equation = var_5440_equation_0, values = (var_5330_cast_fp16_14, var_5405_cast_fp16))[name = tensor<string, []>("op_5440_cast_fp16")];
+            tensor<string, []> var_5442_equation_0 = const()[name = tensor<string, []>("op_5442_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5442_cast_fp16 = einsum(equation = var_5442_equation_0, values = (var_5330_cast_fp16_15, var_5406_cast_fp16))[name = tensor<string, []>("op_5442_cast_fp16")];
+            tensor<string, []> var_5444_equation_0 = const()[name = tensor<string, []>("op_5444_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5444_cast_fp16 = einsum(equation = var_5444_equation_0, values = (var_5330_cast_fp16_16, var_5407_cast_fp16))[name = tensor<string, []>("op_5444_cast_fp16")];
+            tensor<string, []> var_5446_equation_0 = const()[name = tensor<string, []>("op_5446_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5446_cast_fp16 = einsum(equation = var_5446_equation_0, values = (var_5330_cast_fp16_17, var_5408_cast_fp16))[name = tensor<string, []>("op_5446_cast_fp16")];
+            tensor<string, []> var_5448_equation_0 = const()[name = tensor<string, []>("op_5448_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5448_cast_fp16 = einsum(equation = var_5448_equation_0, values = (var_5330_cast_fp16_18, var_5409_cast_fp16))[name = tensor<string, []>("op_5448_cast_fp16")];
+            tensor<string, []> var_5450_equation_0 = const()[name = tensor<string, []>("op_5450_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5450_cast_fp16 = einsum(equation = var_5450_equation_0, values = (var_5330_cast_fp16_19, var_5410_cast_fp16))[name = tensor<string, []>("op_5450_cast_fp16")];
+            tensor<bool, []> input_195_interleave_0 = const()[name = tensor<string, []>("input_195_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_195_cast_fp16 = concat(axis = var_5235, interleave = input_195_interleave_0, values = (var_5412_cast_fp16, var_5414_cast_fp16, var_5416_cast_fp16, var_5418_cast_fp16, var_5420_cast_fp16, var_5422_cast_fp16, var_5424_cast_fp16, var_5426_cast_fp16, var_5428_cast_fp16, var_5430_cast_fp16, var_5432_cast_fp16, var_5434_cast_fp16, var_5436_cast_fp16, var_5438_cast_fp16, var_5440_cast_fp16, var_5442_cast_fp16, var_5444_cast_fp16, var_5446_cast_fp16, var_5448_cast_fp16, var_5450_cast_fp16))[name = tensor<string, []>("input_195_cast_fp16")];
+            tensor<string, []> var_5459_pad_type_0 = const()[name = tensor<string, []>("op_5459_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5459_strides_0 = const()[name = tensor<string, []>("op_5459_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5459_pad_0 = const()[name = tensor<string, []>("op_5459_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5459_dilations_0 = const()[name = tensor<string, []>("op_5459_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5459_groups_0 = const()[name = tensor<string, []>("op_5459_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_19_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(771843712)))];
+            tensor<fp16, [1280]> blocks_19_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(775120576)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5459_cast_fp16 = conv(bias = blocks_19_attn_out_bias_to_fp16, dilations = var_5459_dilations_0, groups = var_5459_groups_0, pad = var_5459_pad_0, pad_type = var_5459_pad_type_0, strides = var_5459_strides_0, weight = blocks_19_attn_out_weight_to_fp16, x = input_195_cast_fp16)[name = tensor<string, []>("op_5459_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_79_cast_fp16 = add(x = inputs_77_cast_fp16, y = var_5459_cast_fp16)[name = tensor<string, []>("inputs_79_cast_fp16")];
+            tensor<int32, [1]> input_197_axes_0 = const()[name = tensor<string, []>("input_197_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_197_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_197_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(775123200)))];
+            tensor<fp16, [1280]> input_197_beta_0_to_fp16 = const()[name = tensor<string, []>("input_197_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(775125824)))];
+            tensor<fp16, []> var_5469_to_fp16 = const()[name = tensor<string, []>("op_5469_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_197_cast_fp16 = layer_norm(axes = input_197_axes_0, beta = input_197_beta_0_to_fp16, epsilon = var_5469_to_fp16, gamma = input_197_gamma_0_to_fp16, x = inputs_79_cast_fp16)[name = tensor<string, []>("input_197_cast_fp16")];
+            tensor<string, []> input_199_pad_type_0 = const()[name = tensor<string, []>("input_199_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_199_strides_0 = const()[name = tensor<string, []>("input_199_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_199_pad_0 = const()[name = tensor<string, []>("input_199_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_199_dilations_0 = const()[name = tensor<string, []>("input_199_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_199_groups_0 = const()[name = tensor<string, []>("input_199_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_19_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(775128448)))];
+            tensor<fp16, [5120]> blocks_19_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(788235712)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_199_cast_fp16 = conv(bias = blocks_19_mlp_0_bias_to_fp16, dilations = input_199_dilations_0, groups = input_199_groups_0, pad = input_199_pad_0, pad_type = input_199_pad_type_0, strides = input_199_strides_0, weight = blocks_19_mlp_0_weight_to_fp16, x = input_197_cast_fp16)[name = tensor<string, []>("input_199_cast_fp16")];
+            tensor<string, []> input_201_mode_0 = const()[name = tensor<string, []>("input_201_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_201_cast_fp16 = gelu(mode = input_201_mode_0, x = input_199_cast_fp16)[name = tensor<string, []>("input_201_cast_fp16")];
+            tensor<string, []> var_5495_pad_type_0 = const()[name = tensor<string, []>("op_5495_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5495_strides_0 = const()[name = tensor<string, []>("op_5495_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5495_pad_0 = const()[name = tensor<string, []>("op_5495_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5495_dilations_0 = const()[name = tensor<string, []>("op_5495_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5495_groups_0 = const()[name = tensor<string, []>("op_5495_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_19_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(788246016)))];
+            tensor<fp16, [1280]> blocks_19_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(801353280)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5495_cast_fp16 = conv(bias = blocks_19_mlp_2_bias_to_fp16, dilations = var_5495_dilations_0, groups = var_5495_groups_0, pad = var_5495_pad_0, pad_type = var_5495_pad_type_0, strides = var_5495_strides_0, weight = blocks_19_mlp_2_weight_to_fp16, x = input_201_cast_fp16)[name = tensor<string, []>("op_5495_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_81_cast_fp16 = add(x = inputs_79_cast_fp16, y = var_5495_cast_fp16)[name = tensor<string, []>("inputs_81_cast_fp16")];
+            tensor<int32, []> var_5504 = const()[name = tensor<string, []>("op_5504"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_203_axes_0 = const()[name = tensor<string, []>("input_203_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_203_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_203_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(801355904)))];
+            tensor<fp16, [1280]> input_203_beta_0_to_fp16 = const()[name = tensor<string, []>("input_203_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(801358528)))];
+            tensor<fp16, []> var_5520_to_fp16 = const()[name = tensor<string, []>("op_5520_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_203_cast_fp16 = layer_norm(axes = input_203_axes_0, beta = input_203_beta_0_to_fp16, epsilon = var_5520_to_fp16, gamma = input_203_gamma_0_to_fp16, x = inputs_81_cast_fp16)[name = tensor<string, []>("input_203_cast_fp16")];
+            tensor<string, []> q_41_pad_type_0 = const()[name = tensor<string, []>("q_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_41_strides_0 = const()[name = tensor<string, []>("q_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_41_pad_0 = const()[name = tensor<string, []>("q_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_41_dilations_0 = const()[name = tensor<string, []>("q_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_41_groups_0 = const()[name = tensor<string, []>("q_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_5555_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5555_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(801361152)))];
+            tensor<fp16, [1280]> var_5555_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5555_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(804638016)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5555_cast_fp16 = conv(bias = var_5555_bias_0_to_fp16, dilations = q_41_dilations_0, groups = q_41_groups_0, pad = q_41_pad_0, pad_type = q_41_pad_type_0, strides = q_41_strides_0, weight = var_5555_weight_0_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("op_5555_cast_fp16")];
+            tensor<string, []> k_41_pad_type_0 = const()[name = tensor<string, []>("k_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_41_strides_0 = const()[name = tensor<string, []>("k_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_41_pad_0 = const()[name = tensor<string, []>("k_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_41_dilations_0 = const()[name = tensor<string, []>("k_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_41_groups_0 = const()[name = tensor<string, []>("k_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_20_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(804640640)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_41_cast_fp16 = conv(dilations = k_41_dilations_0, groups = k_41_groups_0, pad = k_41_pad_0, pad_type = k_41_pad_type_0, strides = k_41_strides_0, weight = blocks_20_attn_key_weight_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("k_41_cast_fp16")];
+            tensor<string, []> var_5553_pad_type_0 = const()[name = tensor<string, []>("op_5553_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5553_strides_0 = const()[name = tensor<string, []>("op_5553_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5553_pad_0 = const()[name = tensor<string, []>("op_5553_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5553_dilations_0 = const()[name = tensor<string, []>("op_5553_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5553_groups_0 = const()[name = tensor<string, []>("op_5553_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_20_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(807917504)))];
+            tensor<fp16, [1280]> blocks_20_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(811194368)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5553_cast_fp16 = conv(bias = blocks_20_attn_value_bias_to_fp16, dilations = var_5553_dilations_0, groups = var_5553_groups_0, pad = var_5553_pad_0, pad_type = var_5553_pad_type_0, strides = var_5553_strides_0, weight = blocks_20_attn_value_weight_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("op_5553_cast_fp16")];
+            tensor<int32, [20]> tile_60 = const()[name = tensor<string, []>("tile_60"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5556_axis_0 = const()[name = tensor<string, []>("op_5556_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_19 = split(axis = var_5556_axis_0, split_sizes = tile_60, x = var_5555_cast_fp16)[name = tensor<string, []>("op_5556_cast_fp16")];
+            tensor<int32, [4]> var_5577_perm_0 = const()[name = tensor<string, []>("op_5577_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_61 = const()[name = tensor<string, []>("tile_61"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5578_axis_0 = const()[name = tensor<string, []>("op_5578_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_5577_cast_fp16 = transpose(perm = var_5577_perm_0, x = k_41_cast_fp16)[name = tensor<string, []>("transpose_12")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_19 = split(axis = var_5578_axis_0, split_sizes = tile_61, x = var_5577_cast_fp16)[name = tensor<string, []>("op_5578_cast_fp16")];
+            tensor<int32, [20]> tile_62 = const()[name = tensor<string, []>("tile_62"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5599_axis_0 = const()[name = tensor<string, []>("op_5599_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_19 = split(axis = var_5599_axis_0, split_sizes = tile_62, x = var_5553_cast_fp16)[name = tensor<string, []>("op_5599_cast_fp16")];
+            tensor<string, []> aw_801_equation_0 = const()[name = tensor<string, []>("aw_801_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_801_cast_fp16 = einsum(equation = aw_801_equation_0, values = (var_5578_cast_fp16_0, var_5556_cast_fp16_0))[name = tensor<string, []>("aw_801_cast_fp16")];
+            tensor<string, []> aw_803_equation_0 = const()[name = tensor<string, []>("aw_803_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_803_cast_fp16 = einsum(equation = aw_803_equation_0, values = (var_5578_cast_fp16_1, var_5556_cast_fp16_1))[name = tensor<string, []>("aw_803_cast_fp16")];
+            tensor<string, []> aw_805_equation_0 = const()[name = tensor<string, []>("aw_805_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_805_cast_fp16 = einsum(equation = aw_805_equation_0, values = (var_5578_cast_fp16_2, var_5556_cast_fp16_2))[name = tensor<string, []>("aw_805_cast_fp16")];
+            tensor<string, []> aw_807_equation_0 = const()[name = tensor<string, []>("aw_807_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_807_cast_fp16 = einsum(equation = aw_807_equation_0, values = (var_5578_cast_fp16_3, var_5556_cast_fp16_3))[name = tensor<string, []>("aw_807_cast_fp16")];
+            tensor<string, []> aw_809_equation_0 = const()[name = tensor<string, []>("aw_809_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_809_cast_fp16 = einsum(equation = aw_809_equation_0, values = (var_5578_cast_fp16_4, var_5556_cast_fp16_4))[name = tensor<string, []>("aw_809_cast_fp16")];
+            tensor<string, []> aw_811_equation_0 = const()[name = tensor<string, []>("aw_811_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_811_cast_fp16 = einsum(equation = aw_811_equation_0, values = (var_5578_cast_fp16_5, var_5556_cast_fp16_5))[name = tensor<string, []>("aw_811_cast_fp16")];
+            tensor<string, []> aw_813_equation_0 = const()[name = tensor<string, []>("aw_813_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_813_cast_fp16 = einsum(equation = aw_813_equation_0, values = (var_5578_cast_fp16_6, var_5556_cast_fp16_6))[name = tensor<string, []>("aw_813_cast_fp16")];
+            tensor<string, []> aw_815_equation_0 = const()[name = tensor<string, []>("aw_815_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_815_cast_fp16 = einsum(equation = aw_815_equation_0, values = (var_5578_cast_fp16_7, var_5556_cast_fp16_7))[name = tensor<string, []>("aw_815_cast_fp16")];
+            tensor<string, []> aw_817_equation_0 = const()[name = tensor<string, []>("aw_817_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_817_cast_fp16 = einsum(equation = aw_817_equation_0, values = (var_5578_cast_fp16_8, var_5556_cast_fp16_8))[name = tensor<string, []>("aw_817_cast_fp16")];
+            tensor<string, []> aw_819_equation_0 = const()[name = tensor<string, []>("aw_819_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_819_cast_fp16 = einsum(equation = aw_819_equation_0, values = (var_5578_cast_fp16_9, var_5556_cast_fp16_9))[name = tensor<string, []>("aw_819_cast_fp16")];
+            tensor<string, []> aw_821_equation_0 = const()[name = tensor<string, []>("aw_821_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_821_cast_fp16 = einsum(equation = aw_821_equation_0, values = (var_5578_cast_fp16_10, var_5556_cast_fp16_10))[name = tensor<string, []>("aw_821_cast_fp16")];
+            tensor<string, []> aw_823_equation_0 = const()[name = tensor<string, []>("aw_823_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_823_cast_fp16 = einsum(equation = aw_823_equation_0, values = (var_5578_cast_fp16_11, var_5556_cast_fp16_11))[name = tensor<string, []>("aw_823_cast_fp16")];
+            tensor<string, []> aw_825_equation_0 = const()[name = tensor<string, []>("aw_825_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_825_cast_fp16 = einsum(equation = aw_825_equation_0, values = (var_5578_cast_fp16_12, var_5556_cast_fp16_12))[name = tensor<string, []>("aw_825_cast_fp16")];
+            tensor<string, []> aw_827_equation_0 = const()[name = tensor<string, []>("aw_827_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_827_cast_fp16 = einsum(equation = aw_827_equation_0, values = (var_5578_cast_fp16_13, var_5556_cast_fp16_13))[name = tensor<string, []>("aw_827_cast_fp16")];
+            tensor<string, []> aw_829_equation_0 = const()[name = tensor<string, []>("aw_829_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_829_cast_fp16 = einsum(equation = aw_829_equation_0, values = (var_5578_cast_fp16_14, var_5556_cast_fp16_14))[name = tensor<string, []>("aw_829_cast_fp16")];
+            tensor<string, []> aw_831_equation_0 = const()[name = tensor<string, []>("aw_831_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_831_cast_fp16 = einsum(equation = aw_831_equation_0, values = (var_5578_cast_fp16_15, var_5556_cast_fp16_15))[name = tensor<string, []>("aw_831_cast_fp16")];
+            tensor<string, []> aw_833_equation_0 = const()[name = tensor<string, []>("aw_833_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_833_cast_fp16 = einsum(equation = aw_833_equation_0, values = (var_5578_cast_fp16_16, var_5556_cast_fp16_16))[name = tensor<string, []>("aw_833_cast_fp16")];
+            tensor<string, []> aw_835_equation_0 = const()[name = tensor<string, []>("aw_835_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_835_cast_fp16 = einsum(equation = aw_835_equation_0, values = (var_5578_cast_fp16_17, var_5556_cast_fp16_17))[name = tensor<string, []>("aw_835_cast_fp16")];
+            tensor<string, []> aw_837_equation_0 = const()[name = tensor<string, []>("aw_837_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_837_cast_fp16 = einsum(equation = aw_837_equation_0, values = (var_5578_cast_fp16_18, var_5556_cast_fp16_18))[name = tensor<string, []>("aw_837_cast_fp16")];
+            tensor<string, []> aw_839_equation_0 = const()[name = tensor<string, []>("aw_839_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_839_cast_fp16 = einsum(equation = aw_839_equation_0, values = (var_5578_cast_fp16_19, var_5556_cast_fp16_19))[name = tensor<string, []>("aw_839_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5660_cast_fp16 = softmax(axis = var_5504, x = aw_801_cast_fp16)[name = tensor<string, []>("op_5660_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5661_cast_fp16 = softmax(axis = var_5504, x = aw_803_cast_fp16)[name = tensor<string, []>("op_5661_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5662_cast_fp16 = softmax(axis = var_5504, x = aw_805_cast_fp16)[name = tensor<string, []>("op_5662_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5663_cast_fp16 = softmax(axis = var_5504, x = aw_807_cast_fp16)[name = tensor<string, []>("op_5663_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5664_cast_fp16 = softmax(axis = var_5504, x = aw_809_cast_fp16)[name = tensor<string, []>("op_5664_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5665_cast_fp16 = softmax(axis = var_5504, x = aw_811_cast_fp16)[name = tensor<string, []>("op_5665_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5666_cast_fp16 = softmax(axis = var_5504, x = aw_813_cast_fp16)[name = tensor<string, []>("op_5666_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5667_cast_fp16 = softmax(axis = var_5504, x = aw_815_cast_fp16)[name = tensor<string, []>("op_5667_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5668_cast_fp16 = softmax(axis = var_5504, x = aw_817_cast_fp16)[name = tensor<string, []>("op_5668_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5669_cast_fp16 = softmax(axis = var_5504, x = aw_819_cast_fp16)[name = tensor<string, []>("op_5669_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5670_cast_fp16 = softmax(axis = var_5504, x = aw_821_cast_fp16)[name = tensor<string, []>("op_5670_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5671_cast_fp16 = softmax(axis = var_5504, x = aw_823_cast_fp16)[name = tensor<string, []>("op_5671_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5672_cast_fp16 = softmax(axis = var_5504, x = aw_825_cast_fp16)[name = tensor<string, []>("op_5672_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5673_cast_fp16 = softmax(axis = var_5504, x = aw_827_cast_fp16)[name = tensor<string, []>("op_5673_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5674_cast_fp16 = softmax(axis = var_5504, x = aw_829_cast_fp16)[name = tensor<string, []>("op_5674_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5675_cast_fp16 = softmax(axis = var_5504, x = aw_831_cast_fp16)[name = tensor<string, []>("op_5675_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5676_cast_fp16 = softmax(axis = var_5504, x = aw_833_cast_fp16)[name = tensor<string, []>("op_5676_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5677_cast_fp16 = softmax(axis = var_5504, x = aw_835_cast_fp16)[name = tensor<string, []>("op_5677_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5678_cast_fp16 = softmax(axis = var_5504, x = aw_837_cast_fp16)[name = tensor<string, []>("op_5678_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5679_cast_fp16 = softmax(axis = var_5504, x = aw_839_cast_fp16)[name = tensor<string, []>("op_5679_cast_fp16")];
+            tensor<string, []> var_5681_equation_0 = const()[name = tensor<string, []>("op_5681_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5681_cast_fp16 = einsum(equation = var_5681_equation_0, values = (var_5599_cast_fp16_0, var_5660_cast_fp16))[name = tensor<string, []>("op_5681_cast_fp16")];
+            tensor<string, []> var_5683_equation_0 = const()[name = tensor<string, []>("op_5683_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5683_cast_fp16 = einsum(equation = var_5683_equation_0, values = (var_5599_cast_fp16_1, var_5661_cast_fp16))[name = tensor<string, []>("op_5683_cast_fp16")];
+            tensor<string, []> var_5685_equation_0 = const()[name = tensor<string, []>("op_5685_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5685_cast_fp16 = einsum(equation = var_5685_equation_0, values = (var_5599_cast_fp16_2, var_5662_cast_fp16))[name = tensor<string, []>("op_5685_cast_fp16")];
+            tensor<string, []> var_5687_equation_0 = const()[name = tensor<string, []>("op_5687_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5687_cast_fp16 = einsum(equation = var_5687_equation_0, values = (var_5599_cast_fp16_3, var_5663_cast_fp16))[name = tensor<string, []>("op_5687_cast_fp16")];
+            tensor<string, []> var_5689_equation_0 = const()[name = tensor<string, []>("op_5689_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5689_cast_fp16 = einsum(equation = var_5689_equation_0, values = (var_5599_cast_fp16_4, var_5664_cast_fp16))[name = tensor<string, []>("op_5689_cast_fp16")];
+            tensor<string, []> var_5691_equation_0 = const()[name = tensor<string, []>("op_5691_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5691_cast_fp16 = einsum(equation = var_5691_equation_0, values = (var_5599_cast_fp16_5, var_5665_cast_fp16))[name = tensor<string, []>("op_5691_cast_fp16")];
+            tensor<string, []> var_5693_equation_0 = const()[name = tensor<string, []>("op_5693_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5693_cast_fp16 = einsum(equation = var_5693_equation_0, values = (var_5599_cast_fp16_6, var_5666_cast_fp16))[name = tensor<string, []>("op_5693_cast_fp16")];
+            tensor<string, []> var_5695_equation_0 = const()[name = tensor<string, []>("op_5695_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5695_cast_fp16 = einsum(equation = var_5695_equation_0, values = (var_5599_cast_fp16_7, var_5667_cast_fp16))[name = tensor<string, []>("op_5695_cast_fp16")];
+            tensor<string, []> var_5697_equation_0 = const()[name = tensor<string, []>("op_5697_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5697_cast_fp16 = einsum(equation = var_5697_equation_0, values = (var_5599_cast_fp16_8, var_5668_cast_fp16))[name = tensor<string, []>("op_5697_cast_fp16")];
+            tensor<string, []> var_5699_equation_0 = const()[name = tensor<string, []>("op_5699_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5699_cast_fp16 = einsum(equation = var_5699_equation_0, values = (var_5599_cast_fp16_9, var_5669_cast_fp16))[name = tensor<string, []>("op_5699_cast_fp16")];
+            tensor<string, []> var_5701_equation_0 = const()[name = tensor<string, []>("op_5701_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5701_cast_fp16 = einsum(equation = var_5701_equation_0, values = (var_5599_cast_fp16_10, var_5670_cast_fp16))[name = tensor<string, []>("op_5701_cast_fp16")];
+            tensor<string, []> var_5703_equation_0 = const()[name = tensor<string, []>("op_5703_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5703_cast_fp16 = einsum(equation = var_5703_equation_0, values = (var_5599_cast_fp16_11, var_5671_cast_fp16))[name = tensor<string, []>("op_5703_cast_fp16")];
+            tensor<string, []> var_5705_equation_0 = const()[name = tensor<string, []>("op_5705_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5705_cast_fp16 = einsum(equation = var_5705_equation_0, values = (var_5599_cast_fp16_12, var_5672_cast_fp16))[name = tensor<string, []>("op_5705_cast_fp16")];
+            tensor<string, []> var_5707_equation_0 = const()[name = tensor<string, []>("op_5707_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5707_cast_fp16 = einsum(equation = var_5707_equation_0, values = (var_5599_cast_fp16_13, var_5673_cast_fp16))[name = tensor<string, []>("op_5707_cast_fp16")];
+            tensor<string, []> var_5709_equation_0 = const()[name = tensor<string, []>("op_5709_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5709_cast_fp16 = einsum(equation = var_5709_equation_0, values = (var_5599_cast_fp16_14, var_5674_cast_fp16))[name = tensor<string, []>("op_5709_cast_fp16")];
+            tensor<string, []> var_5711_equation_0 = const()[name = tensor<string, []>("op_5711_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5711_cast_fp16 = einsum(equation = var_5711_equation_0, values = (var_5599_cast_fp16_15, var_5675_cast_fp16))[name = tensor<string, []>("op_5711_cast_fp16")];
+            tensor<string, []> var_5713_equation_0 = const()[name = tensor<string, []>("op_5713_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5713_cast_fp16 = einsum(equation = var_5713_equation_0, values = (var_5599_cast_fp16_16, var_5676_cast_fp16))[name = tensor<string, []>("op_5713_cast_fp16")];
+            tensor<string, []> var_5715_equation_0 = const()[name = tensor<string, []>("op_5715_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5715_cast_fp16 = einsum(equation = var_5715_equation_0, values = (var_5599_cast_fp16_17, var_5677_cast_fp16))[name = tensor<string, []>("op_5715_cast_fp16")];
+            tensor<string, []> var_5717_equation_0 = const()[name = tensor<string, []>("op_5717_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5717_cast_fp16 = einsum(equation = var_5717_equation_0, values = (var_5599_cast_fp16_18, var_5678_cast_fp16))[name = tensor<string, []>("op_5717_cast_fp16")];
+            tensor<string, []> var_5719_equation_0 = const()[name = tensor<string, []>("op_5719_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5719_cast_fp16 = einsum(equation = var_5719_equation_0, values = (var_5599_cast_fp16_19, var_5679_cast_fp16))[name = tensor<string, []>("op_5719_cast_fp16")];
+            tensor<bool, []> input_205_interleave_0 = const()[name = tensor<string, []>("input_205_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_205_cast_fp16 = concat(axis = var_5504, interleave = input_205_interleave_0, values = (var_5681_cast_fp16, var_5683_cast_fp16, var_5685_cast_fp16, var_5687_cast_fp16, var_5689_cast_fp16, var_5691_cast_fp16, var_5693_cast_fp16, var_5695_cast_fp16, var_5697_cast_fp16, var_5699_cast_fp16, var_5701_cast_fp16, var_5703_cast_fp16, var_5705_cast_fp16, var_5707_cast_fp16, var_5709_cast_fp16, var_5711_cast_fp16, var_5713_cast_fp16, var_5715_cast_fp16, var_5717_cast_fp16, var_5719_cast_fp16))[name = tensor<string, []>("input_205_cast_fp16")];
+            tensor<string, []> var_5728_pad_type_0 = const()[name = tensor<string, []>("op_5728_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5728_strides_0 = const()[name = tensor<string, []>("op_5728_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5728_pad_0 = const()[name = tensor<string, []>("op_5728_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5728_dilations_0 = const()[name = tensor<string, []>("op_5728_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5728_groups_0 = const()[name = tensor<string, []>("op_5728_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_20_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(811196992)))];
+            tensor<fp16, [1280]> blocks_20_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(814473856)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5728_cast_fp16 = conv(bias = blocks_20_attn_out_bias_to_fp16, dilations = var_5728_dilations_0, groups = var_5728_groups_0, pad = var_5728_pad_0, pad_type = var_5728_pad_type_0, strides = var_5728_strides_0, weight = blocks_20_attn_out_weight_to_fp16, x = input_205_cast_fp16)[name = tensor<string, []>("op_5728_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_83_cast_fp16 = add(x = inputs_81_cast_fp16, y = var_5728_cast_fp16)[name = tensor<string, []>("inputs_83_cast_fp16")];
+            tensor<int32, [1]> input_207_axes_0 = const()[name = tensor<string, []>("input_207_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_207_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_207_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(814476480)))];
+            tensor<fp16, [1280]> input_207_beta_0_to_fp16 = const()[name = tensor<string, []>("input_207_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(814479104)))];
+            tensor<fp16, []> var_5738_to_fp16 = const()[name = tensor<string, []>("op_5738_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_207_cast_fp16 = layer_norm(axes = input_207_axes_0, beta = input_207_beta_0_to_fp16, epsilon = var_5738_to_fp16, gamma = input_207_gamma_0_to_fp16, x = inputs_83_cast_fp16)[name = tensor<string, []>("input_207_cast_fp16")];
+            tensor<string, []> input_209_pad_type_0 = const()[name = tensor<string, []>("input_209_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_209_strides_0 = const()[name = tensor<string, []>("input_209_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_209_pad_0 = const()[name = tensor<string, []>("input_209_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_209_dilations_0 = const()[name = tensor<string, []>("input_209_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_209_groups_0 = const()[name = tensor<string, []>("input_209_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_20_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(814481728)))];
+            tensor<fp16, [5120]> blocks_20_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(827588992)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_209_cast_fp16 = conv(bias = blocks_20_mlp_0_bias_to_fp16, dilations = input_209_dilations_0, groups = input_209_groups_0, pad = input_209_pad_0, pad_type = input_209_pad_type_0, strides = input_209_strides_0, weight = blocks_20_mlp_0_weight_to_fp16, x = input_207_cast_fp16)[name = tensor<string, []>("input_209_cast_fp16")];
+            tensor<string, []> input_211_mode_0 = const()[name = tensor<string, []>("input_211_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_211_cast_fp16 = gelu(mode = input_211_mode_0, x = input_209_cast_fp16)[name = tensor<string, []>("input_211_cast_fp16")];
+            tensor<string, []> var_5764_pad_type_0 = const()[name = tensor<string, []>("op_5764_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5764_strides_0 = const()[name = tensor<string, []>("op_5764_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5764_pad_0 = const()[name = tensor<string, []>("op_5764_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5764_dilations_0 = const()[name = tensor<string, []>("op_5764_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5764_groups_0 = const()[name = tensor<string, []>("op_5764_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_20_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(827599296)))];
+            tensor<fp16, [1280]> blocks_20_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(840706560)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5764_cast_fp16 = conv(bias = blocks_20_mlp_2_bias_to_fp16, dilations = var_5764_dilations_0, groups = var_5764_groups_0, pad = var_5764_pad_0, pad_type = var_5764_pad_type_0, strides = var_5764_strides_0, weight = blocks_20_mlp_2_weight_to_fp16, x = input_211_cast_fp16)[name = tensor<string, []>("op_5764_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_85_cast_fp16 = add(x = inputs_83_cast_fp16, y = var_5764_cast_fp16)[name = tensor<string, []>("inputs_85_cast_fp16")];
+            tensor<int32, []> var_5773 = const()[name = tensor<string, []>("op_5773"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_213_axes_0 = const()[name = tensor<string, []>("input_213_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_213_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_213_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(840709184)))];
+            tensor<fp16, [1280]> input_213_beta_0_to_fp16 = const()[name = tensor<string, []>("input_213_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(840711808)))];
+            tensor<fp16, []> var_5789_to_fp16 = const()[name = tensor<string, []>("op_5789_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_213_cast_fp16 = layer_norm(axes = input_213_axes_0, beta = input_213_beta_0_to_fp16, epsilon = var_5789_to_fp16, gamma = input_213_gamma_0_to_fp16, x = inputs_85_cast_fp16)[name = tensor<string, []>("input_213_cast_fp16")];
+            tensor<string, []> q_43_pad_type_0 = const()[name = tensor<string, []>("q_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_43_strides_0 = const()[name = tensor<string, []>("q_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_43_pad_0 = const()[name = tensor<string, []>("q_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_43_dilations_0 = const()[name = tensor<string, []>("q_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_43_groups_0 = const()[name = tensor<string, []>("q_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_5824_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5824_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(840714432)))];
+            tensor<fp16, [1280]> var_5824_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5824_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(843991296)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5824_cast_fp16 = conv(bias = var_5824_bias_0_to_fp16, dilations = q_43_dilations_0, groups = q_43_groups_0, pad = q_43_pad_0, pad_type = q_43_pad_type_0, strides = q_43_strides_0, weight = var_5824_weight_0_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("op_5824_cast_fp16")];
+            tensor<string, []> k_43_pad_type_0 = const()[name = tensor<string, []>("k_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_43_strides_0 = const()[name = tensor<string, []>("k_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_43_pad_0 = const()[name = tensor<string, []>("k_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_43_dilations_0 = const()[name = tensor<string, []>("k_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_43_groups_0 = const()[name = tensor<string, []>("k_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_21_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(843993920)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_43_cast_fp16 = conv(dilations = k_43_dilations_0, groups = k_43_groups_0, pad = k_43_pad_0, pad_type = k_43_pad_type_0, strides = k_43_strides_0, weight = blocks_21_attn_key_weight_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("k_43_cast_fp16")];
+            tensor<string, []> var_5822_pad_type_0 = const()[name = tensor<string, []>("op_5822_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5822_strides_0 = const()[name = tensor<string, []>("op_5822_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5822_pad_0 = const()[name = tensor<string, []>("op_5822_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5822_dilations_0 = const()[name = tensor<string, []>("op_5822_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5822_groups_0 = const()[name = tensor<string, []>("op_5822_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_21_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(847270784)))];
+            tensor<fp16, [1280]> blocks_21_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(850547648)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5822_cast_fp16 = conv(bias = blocks_21_attn_value_bias_to_fp16, dilations = var_5822_dilations_0, groups = var_5822_groups_0, pad = var_5822_pad_0, pad_type = var_5822_pad_type_0, strides = var_5822_strides_0, weight = blocks_21_attn_value_weight_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("op_5822_cast_fp16")];
+            tensor<int32, [20]> tile_63 = const()[name = tensor<string, []>("tile_63"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5825_axis_0 = const()[name = tensor<string, []>("op_5825_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_19 = split(axis = var_5825_axis_0, split_sizes = tile_63, x = var_5824_cast_fp16)[name = tensor<string, []>("op_5825_cast_fp16")];
+            tensor<int32, [4]> var_5846_perm_0 = const()[name = tensor<string, []>("op_5846_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_64 = const()[name = tensor<string, []>("tile_64"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5847_axis_0 = const()[name = tensor<string, []>("op_5847_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_5846_cast_fp16 = transpose(perm = var_5846_perm_0, x = k_43_cast_fp16)[name = tensor<string, []>("transpose_11")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_19 = split(axis = var_5847_axis_0, split_sizes = tile_64, x = var_5846_cast_fp16)[name = tensor<string, []>("op_5847_cast_fp16")];
+            tensor<int32, [20]> tile_65 = const()[name = tensor<string, []>("tile_65"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5868_axis_0 = const()[name = tensor<string, []>("op_5868_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_19 = split(axis = var_5868_axis_0, split_sizes = tile_65, x = var_5822_cast_fp16)[name = tensor<string, []>("op_5868_cast_fp16")];
+            tensor<string, []> aw_841_equation_0 = const()[name = tensor<string, []>("aw_841_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_841_cast_fp16 = einsum(equation = aw_841_equation_0, values = (var_5847_cast_fp16_0, var_5825_cast_fp16_0))[name = tensor<string, []>("aw_841_cast_fp16")];
+            tensor<string, []> aw_843_equation_0 = const()[name = tensor<string, []>("aw_843_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_843_cast_fp16 = einsum(equation = aw_843_equation_0, values = (var_5847_cast_fp16_1, var_5825_cast_fp16_1))[name = tensor<string, []>("aw_843_cast_fp16")];
+            tensor<string, []> aw_845_equation_0 = const()[name = tensor<string, []>("aw_845_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_845_cast_fp16 = einsum(equation = aw_845_equation_0, values = (var_5847_cast_fp16_2, var_5825_cast_fp16_2))[name = tensor<string, []>("aw_845_cast_fp16")];
+            tensor<string, []> aw_847_equation_0 = const()[name = tensor<string, []>("aw_847_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_847_cast_fp16 = einsum(equation = aw_847_equation_0, values = (var_5847_cast_fp16_3, var_5825_cast_fp16_3))[name = tensor<string, []>("aw_847_cast_fp16")];
+            tensor<string, []> aw_849_equation_0 = const()[name = tensor<string, []>("aw_849_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_849_cast_fp16 = einsum(equation = aw_849_equation_0, values = (var_5847_cast_fp16_4, var_5825_cast_fp16_4))[name = tensor<string, []>("aw_849_cast_fp16")];
+            tensor<string, []> aw_851_equation_0 = const()[name = tensor<string, []>("aw_851_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_851_cast_fp16 = einsum(equation = aw_851_equation_0, values = (var_5847_cast_fp16_5, var_5825_cast_fp16_5))[name = tensor<string, []>("aw_851_cast_fp16")];
+            tensor<string, []> aw_853_equation_0 = const()[name = tensor<string, []>("aw_853_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_853_cast_fp16 = einsum(equation = aw_853_equation_0, values = (var_5847_cast_fp16_6, var_5825_cast_fp16_6))[name = tensor<string, []>("aw_853_cast_fp16")];
+            tensor<string, []> aw_855_equation_0 = const()[name = tensor<string, []>("aw_855_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_855_cast_fp16 = einsum(equation = aw_855_equation_0, values = (var_5847_cast_fp16_7, var_5825_cast_fp16_7))[name = tensor<string, []>("aw_855_cast_fp16")];
+            tensor<string, []> aw_857_equation_0 = const()[name = tensor<string, []>("aw_857_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_857_cast_fp16 = einsum(equation = aw_857_equation_0, values = (var_5847_cast_fp16_8, var_5825_cast_fp16_8))[name = tensor<string, []>("aw_857_cast_fp16")];
+            tensor<string, []> aw_859_equation_0 = const()[name = tensor<string, []>("aw_859_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_859_cast_fp16 = einsum(equation = aw_859_equation_0, values = (var_5847_cast_fp16_9, var_5825_cast_fp16_9))[name = tensor<string, []>("aw_859_cast_fp16")];
+            tensor<string, []> aw_861_equation_0 = const()[name = tensor<string, []>("aw_861_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_861_cast_fp16 = einsum(equation = aw_861_equation_0, values = (var_5847_cast_fp16_10, var_5825_cast_fp16_10))[name = tensor<string, []>("aw_861_cast_fp16")];
+            tensor<string, []> aw_863_equation_0 = const()[name = tensor<string, []>("aw_863_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_863_cast_fp16 = einsum(equation = aw_863_equation_0, values = (var_5847_cast_fp16_11, var_5825_cast_fp16_11))[name = tensor<string, []>("aw_863_cast_fp16")];
+            tensor<string, []> aw_865_equation_0 = const()[name = tensor<string, []>("aw_865_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_865_cast_fp16 = einsum(equation = aw_865_equation_0, values = (var_5847_cast_fp16_12, var_5825_cast_fp16_12))[name = tensor<string, []>("aw_865_cast_fp16")];
+            tensor<string, []> aw_867_equation_0 = const()[name = tensor<string, []>("aw_867_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_867_cast_fp16 = einsum(equation = aw_867_equation_0, values = (var_5847_cast_fp16_13, var_5825_cast_fp16_13))[name = tensor<string, []>("aw_867_cast_fp16")];
+            tensor<string, []> aw_869_equation_0 = const()[name = tensor<string, []>("aw_869_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_869_cast_fp16 = einsum(equation = aw_869_equation_0, values = (var_5847_cast_fp16_14, var_5825_cast_fp16_14))[name = tensor<string, []>("aw_869_cast_fp16")];
+            tensor<string, []> aw_871_equation_0 = const()[name = tensor<string, []>("aw_871_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_871_cast_fp16 = einsum(equation = aw_871_equation_0, values = (var_5847_cast_fp16_15, var_5825_cast_fp16_15))[name = tensor<string, []>("aw_871_cast_fp16")];
+            tensor<string, []> aw_873_equation_0 = const()[name = tensor<string, []>("aw_873_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_873_cast_fp16 = einsum(equation = aw_873_equation_0, values = (var_5847_cast_fp16_16, var_5825_cast_fp16_16))[name = tensor<string, []>("aw_873_cast_fp16")];
+            tensor<string, []> aw_875_equation_0 = const()[name = tensor<string, []>("aw_875_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_875_cast_fp16 = einsum(equation = aw_875_equation_0, values = (var_5847_cast_fp16_17, var_5825_cast_fp16_17))[name = tensor<string, []>("aw_875_cast_fp16")];
+            tensor<string, []> aw_877_equation_0 = const()[name = tensor<string, []>("aw_877_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_877_cast_fp16 = einsum(equation = aw_877_equation_0, values = (var_5847_cast_fp16_18, var_5825_cast_fp16_18))[name = tensor<string, []>("aw_877_cast_fp16")];
+            tensor<string, []> aw_879_equation_0 = const()[name = tensor<string, []>("aw_879_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_879_cast_fp16 = einsum(equation = aw_879_equation_0, values = (var_5847_cast_fp16_19, var_5825_cast_fp16_19))[name = tensor<string, []>("aw_879_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5929_cast_fp16 = softmax(axis = var_5773, x = aw_841_cast_fp16)[name = tensor<string, []>("op_5929_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5930_cast_fp16 = softmax(axis = var_5773, x = aw_843_cast_fp16)[name = tensor<string, []>("op_5930_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5931_cast_fp16 = softmax(axis = var_5773, x = aw_845_cast_fp16)[name = tensor<string, []>("op_5931_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5932_cast_fp16 = softmax(axis = var_5773, x = aw_847_cast_fp16)[name = tensor<string, []>("op_5932_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5933_cast_fp16 = softmax(axis = var_5773, x = aw_849_cast_fp16)[name = tensor<string, []>("op_5933_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5934_cast_fp16 = softmax(axis = var_5773, x = aw_851_cast_fp16)[name = tensor<string, []>("op_5934_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5935_cast_fp16 = softmax(axis = var_5773, x = aw_853_cast_fp16)[name = tensor<string, []>("op_5935_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5936_cast_fp16 = softmax(axis = var_5773, x = aw_855_cast_fp16)[name = tensor<string, []>("op_5936_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5937_cast_fp16 = softmax(axis = var_5773, x = aw_857_cast_fp16)[name = tensor<string, []>("op_5937_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5938_cast_fp16 = softmax(axis = var_5773, x = aw_859_cast_fp16)[name = tensor<string, []>("op_5938_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5939_cast_fp16 = softmax(axis = var_5773, x = aw_861_cast_fp16)[name = tensor<string, []>("op_5939_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5940_cast_fp16 = softmax(axis = var_5773, x = aw_863_cast_fp16)[name = tensor<string, []>("op_5940_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5941_cast_fp16 = softmax(axis = var_5773, x = aw_865_cast_fp16)[name = tensor<string, []>("op_5941_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5942_cast_fp16 = softmax(axis = var_5773, x = aw_867_cast_fp16)[name = tensor<string, []>("op_5942_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5943_cast_fp16 = softmax(axis = var_5773, x = aw_869_cast_fp16)[name = tensor<string, []>("op_5943_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5944_cast_fp16 = softmax(axis = var_5773, x = aw_871_cast_fp16)[name = tensor<string, []>("op_5944_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5945_cast_fp16 = softmax(axis = var_5773, x = aw_873_cast_fp16)[name = tensor<string, []>("op_5945_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5946_cast_fp16 = softmax(axis = var_5773, x = aw_875_cast_fp16)[name = tensor<string, []>("op_5946_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5947_cast_fp16 = softmax(axis = var_5773, x = aw_877_cast_fp16)[name = tensor<string, []>("op_5947_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5948_cast_fp16 = softmax(axis = var_5773, x = aw_879_cast_fp16)[name = tensor<string, []>("op_5948_cast_fp16")];
+            tensor<string, []> var_5950_equation_0 = const()[name = tensor<string, []>("op_5950_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5950_cast_fp16 = einsum(equation = var_5950_equation_0, values = (var_5868_cast_fp16_0, var_5929_cast_fp16))[name = tensor<string, []>("op_5950_cast_fp16")];
+            tensor<string, []> var_5952_equation_0 = const()[name = tensor<string, []>("op_5952_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5952_cast_fp16 = einsum(equation = var_5952_equation_0, values = (var_5868_cast_fp16_1, var_5930_cast_fp16))[name = tensor<string, []>("op_5952_cast_fp16")];
+            tensor<string, []> var_5954_equation_0 = const()[name = tensor<string, []>("op_5954_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5954_cast_fp16 = einsum(equation = var_5954_equation_0, values = (var_5868_cast_fp16_2, var_5931_cast_fp16))[name = tensor<string, []>("op_5954_cast_fp16")];
+            tensor<string, []> var_5956_equation_0 = const()[name = tensor<string, []>("op_5956_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5956_cast_fp16 = einsum(equation = var_5956_equation_0, values = (var_5868_cast_fp16_3, var_5932_cast_fp16))[name = tensor<string, []>("op_5956_cast_fp16")];
+            tensor<string, []> var_5958_equation_0 = const()[name = tensor<string, []>("op_5958_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5958_cast_fp16 = einsum(equation = var_5958_equation_0, values = (var_5868_cast_fp16_4, var_5933_cast_fp16))[name = tensor<string, []>("op_5958_cast_fp16")];
+            tensor<string, []> var_5960_equation_0 = const()[name = tensor<string, []>("op_5960_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5960_cast_fp16 = einsum(equation = var_5960_equation_0, values = (var_5868_cast_fp16_5, var_5934_cast_fp16))[name = tensor<string, []>("op_5960_cast_fp16")];
+            tensor<string, []> var_5962_equation_0 = const()[name = tensor<string, []>("op_5962_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5962_cast_fp16 = einsum(equation = var_5962_equation_0, values = (var_5868_cast_fp16_6, var_5935_cast_fp16))[name = tensor<string, []>("op_5962_cast_fp16")];
+            tensor<string, []> var_5964_equation_0 = const()[name = tensor<string, []>("op_5964_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5964_cast_fp16 = einsum(equation = var_5964_equation_0, values = (var_5868_cast_fp16_7, var_5936_cast_fp16))[name = tensor<string, []>("op_5964_cast_fp16")];
+            tensor<string, []> var_5966_equation_0 = const()[name = tensor<string, []>("op_5966_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5966_cast_fp16 = einsum(equation = var_5966_equation_0, values = (var_5868_cast_fp16_8, var_5937_cast_fp16))[name = tensor<string, []>("op_5966_cast_fp16")];
+            tensor<string, []> var_5968_equation_0 = const()[name = tensor<string, []>("op_5968_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5968_cast_fp16 = einsum(equation = var_5968_equation_0, values = (var_5868_cast_fp16_9, var_5938_cast_fp16))[name = tensor<string, []>("op_5968_cast_fp16")];
+            tensor<string, []> var_5970_equation_0 = const()[name = tensor<string, []>("op_5970_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5970_cast_fp16 = einsum(equation = var_5970_equation_0, values = (var_5868_cast_fp16_10, var_5939_cast_fp16))[name = tensor<string, []>("op_5970_cast_fp16")];
+            tensor<string, []> var_5972_equation_0 = const()[name = tensor<string, []>("op_5972_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5972_cast_fp16 = einsum(equation = var_5972_equation_0, values = (var_5868_cast_fp16_11, var_5940_cast_fp16))[name = tensor<string, []>("op_5972_cast_fp16")];
+            tensor<string, []> var_5974_equation_0 = const()[name = tensor<string, []>("op_5974_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5974_cast_fp16 = einsum(equation = var_5974_equation_0, values = (var_5868_cast_fp16_12, var_5941_cast_fp16))[name = tensor<string, []>("op_5974_cast_fp16")];
+            tensor<string, []> var_5976_equation_0 = const()[name = tensor<string, []>("op_5976_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5976_cast_fp16 = einsum(equation = var_5976_equation_0, values = (var_5868_cast_fp16_13, var_5942_cast_fp16))[name = tensor<string, []>("op_5976_cast_fp16")];
+            tensor<string, []> var_5978_equation_0 = const()[name = tensor<string, []>("op_5978_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5978_cast_fp16 = einsum(equation = var_5978_equation_0, values = (var_5868_cast_fp16_14, var_5943_cast_fp16))[name = tensor<string, []>("op_5978_cast_fp16")];
+            tensor<string, []> var_5980_equation_0 = const()[name = tensor<string, []>("op_5980_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5980_cast_fp16 = einsum(equation = var_5980_equation_0, values = (var_5868_cast_fp16_15, var_5944_cast_fp16))[name = tensor<string, []>("op_5980_cast_fp16")];
+            tensor<string, []> var_5982_equation_0 = const()[name = tensor<string, []>("op_5982_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5982_cast_fp16 = einsum(equation = var_5982_equation_0, values = (var_5868_cast_fp16_16, var_5945_cast_fp16))[name = tensor<string, []>("op_5982_cast_fp16")];
+            tensor<string, []> var_5984_equation_0 = const()[name = tensor<string, []>("op_5984_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5984_cast_fp16 = einsum(equation = var_5984_equation_0, values = (var_5868_cast_fp16_17, var_5946_cast_fp16))[name = tensor<string, []>("op_5984_cast_fp16")];
+            tensor<string, []> var_5986_equation_0 = const()[name = tensor<string, []>("op_5986_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5986_cast_fp16 = einsum(equation = var_5986_equation_0, values = (var_5868_cast_fp16_18, var_5947_cast_fp16))[name = tensor<string, []>("op_5986_cast_fp16")];
+            tensor<string, []> var_5988_equation_0 = const()[name = tensor<string, []>("op_5988_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5988_cast_fp16 = einsum(equation = var_5988_equation_0, values = (var_5868_cast_fp16_19, var_5948_cast_fp16))[name = tensor<string, []>("op_5988_cast_fp16")];
+            tensor<bool, []> input_215_interleave_0 = const()[name = tensor<string, []>("input_215_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_215_cast_fp16 = concat(axis = var_5773, interleave = input_215_interleave_0, values = (var_5950_cast_fp16, var_5952_cast_fp16, var_5954_cast_fp16, var_5956_cast_fp16, var_5958_cast_fp16, var_5960_cast_fp16, var_5962_cast_fp16, var_5964_cast_fp16, var_5966_cast_fp16, var_5968_cast_fp16, var_5970_cast_fp16, var_5972_cast_fp16, var_5974_cast_fp16, var_5976_cast_fp16, var_5978_cast_fp16, var_5980_cast_fp16, var_5982_cast_fp16, var_5984_cast_fp16, var_5986_cast_fp16, var_5988_cast_fp16))[name = tensor<string, []>("input_215_cast_fp16")];
+            tensor<string, []> var_5997_pad_type_0 = const()[name = tensor<string, []>("op_5997_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5997_strides_0 = const()[name = tensor<string, []>("op_5997_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5997_pad_0 = const()[name = tensor<string, []>("op_5997_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5997_dilations_0 = const()[name = tensor<string, []>("op_5997_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5997_groups_0 = const()[name = tensor<string, []>("op_5997_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_21_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(850550272)))];
+            tensor<fp16, [1280]> blocks_21_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(853827136)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5997_cast_fp16 = conv(bias = blocks_21_attn_out_bias_to_fp16, dilations = var_5997_dilations_0, groups = var_5997_groups_0, pad = var_5997_pad_0, pad_type = var_5997_pad_type_0, strides = var_5997_strides_0, weight = blocks_21_attn_out_weight_to_fp16, x = input_215_cast_fp16)[name = tensor<string, []>("op_5997_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_87_cast_fp16 = add(x = inputs_85_cast_fp16, y = var_5997_cast_fp16)[name = tensor<string, []>("inputs_87_cast_fp16")];
+            tensor<int32, [1]> input_217_axes_0 = const()[name = tensor<string, []>("input_217_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_217_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_217_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(853829760)))];
+            tensor<fp16, [1280]> input_217_beta_0_to_fp16 = const()[name = tensor<string, []>("input_217_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(853832384)))];
+            tensor<fp16, []> var_6007_to_fp16 = const()[name = tensor<string, []>("op_6007_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_217_cast_fp16 = layer_norm(axes = input_217_axes_0, beta = input_217_beta_0_to_fp16, epsilon = var_6007_to_fp16, gamma = input_217_gamma_0_to_fp16, x = inputs_87_cast_fp16)[name = tensor<string, []>("input_217_cast_fp16")];
+            tensor<string, []> input_219_pad_type_0 = const()[name = tensor<string, []>("input_219_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_219_strides_0 = const()[name = tensor<string, []>("input_219_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_219_pad_0 = const()[name = tensor<string, []>("input_219_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_219_dilations_0 = const()[name = tensor<string, []>("input_219_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_219_groups_0 = const()[name = tensor<string, []>("input_219_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_21_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(853835008)))];
+            tensor<fp16, [5120]> blocks_21_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(866942272)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_219_cast_fp16 = conv(bias = blocks_21_mlp_0_bias_to_fp16, dilations = input_219_dilations_0, groups = input_219_groups_0, pad = input_219_pad_0, pad_type = input_219_pad_type_0, strides = input_219_strides_0, weight = blocks_21_mlp_0_weight_to_fp16, x = input_217_cast_fp16)[name = tensor<string, []>("input_219_cast_fp16")];
+            tensor<string, []> input_221_mode_0 = const()[name = tensor<string, []>("input_221_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_221_cast_fp16 = gelu(mode = input_221_mode_0, x = input_219_cast_fp16)[name = tensor<string, []>("input_221_cast_fp16")];
+            tensor<string, []> var_6033_pad_type_0 = const()[name = tensor<string, []>("op_6033_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6033_strides_0 = const()[name = tensor<string, []>("op_6033_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6033_pad_0 = const()[name = tensor<string, []>("op_6033_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6033_dilations_0 = const()[name = tensor<string, []>("op_6033_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6033_groups_0 = const()[name = tensor<string, []>("op_6033_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_21_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(866952576)))];
+            tensor<fp16, [1280]> blocks_21_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(880059840)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6033_cast_fp16 = conv(bias = blocks_21_mlp_2_bias_to_fp16, dilations = var_6033_dilations_0, groups = var_6033_groups_0, pad = var_6033_pad_0, pad_type = var_6033_pad_type_0, strides = var_6033_strides_0, weight = blocks_21_mlp_2_weight_to_fp16, x = input_221_cast_fp16)[name = tensor<string, []>("op_6033_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_89_cast_fp16 = add(x = inputs_87_cast_fp16, y = var_6033_cast_fp16)[name = tensor<string, []>("inputs_89_cast_fp16")];
+            tensor<int32, []> var_6042 = const()[name = tensor<string, []>("op_6042"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_223_axes_0 = const()[name = tensor<string, []>("input_223_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_223_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_223_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(880062464)))];
+            tensor<fp16, [1280]> input_223_beta_0_to_fp16 = const()[name = tensor<string, []>("input_223_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(880065088)))];
+            tensor<fp16, []> var_6058_to_fp16 = const()[name = tensor<string, []>("op_6058_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_223_cast_fp16 = layer_norm(axes = input_223_axes_0, beta = input_223_beta_0_to_fp16, epsilon = var_6058_to_fp16, gamma = input_223_gamma_0_to_fp16, x = inputs_89_cast_fp16)[name = tensor<string, []>("input_223_cast_fp16")];
+            tensor<string, []> q_45_pad_type_0 = const()[name = tensor<string, []>("q_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_45_strides_0 = const()[name = tensor<string, []>("q_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_45_pad_0 = const()[name = tensor<string, []>("q_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_45_dilations_0 = const()[name = tensor<string, []>("q_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_45_groups_0 = const()[name = tensor<string, []>("q_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_6093_weight_0_to_fp16 = const()[name = tensor<string, []>("op_6093_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(880067712)))];
+            tensor<fp16, [1280]> var_6093_bias_0_to_fp16 = const()[name = tensor<string, []>("op_6093_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(883344576)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6093_cast_fp16 = conv(bias = var_6093_bias_0_to_fp16, dilations = q_45_dilations_0, groups = q_45_groups_0, pad = q_45_pad_0, pad_type = q_45_pad_type_0, strides = q_45_strides_0, weight = var_6093_weight_0_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("op_6093_cast_fp16")];
+            tensor<string, []> k_45_pad_type_0 = const()[name = tensor<string, []>("k_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_45_strides_0 = const()[name = tensor<string, []>("k_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_45_pad_0 = const()[name = tensor<string, []>("k_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_45_dilations_0 = const()[name = tensor<string, []>("k_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_45_groups_0 = const()[name = tensor<string, []>("k_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_22_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(883347200)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_45_cast_fp16 = conv(dilations = k_45_dilations_0, groups = k_45_groups_0, pad = k_45_pad_0, pad_type = k_45_pad_type_0, strides = k_45_strides_0, weight = blocks_22_attn_key_weight_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("k_45_cast_fp16")];
+            tensor<string, []> var_6091_pad_type_0 = const()[name = tensor<string, []>("op_6091_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6091_strides_0 = const()[name = tensor<string, []>("op_6091_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6091_pad_0 = const()[name = tensor<string, []>("op_6091_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6091_dilations_0 = const()[name = tensor<string, []>("op_6091_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6091_groups_0 = const()[name = tensor<string, []>("op_6091_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_22_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(886624064)))];
+            tensor<fp16, [1280]> blocks_22_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(889900928)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6091_cast_fp16 = conv(bias = blocks_22_attn_value_bias_to_fp16, dilations = var_6091_dilations_0, groups = var_6091_groups_0, pad = var_6091_pad_0, pad_type = var_6091_pad_type_0, strides = var_6091_strides_0, weight = blocks_22_attn_value_weight_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("op_6091_cast_fp16")];
+            tensor<int32, [20]> tile_66 = const()[name = tensor<string, []>("tile_66"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6094_axis_0 = const()[name = tensor<string, []>("op_6094_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_19 = split(axis = var_6094_axis_0, split_sizes = tile_66, x = var_6093_cast_fp16)[name = tensor<string, []>("op_6094_cast_fp16")];
+            tensor<int32, [4]> var_6115_perm_0 = const()[name = tensor<string, []>("op_6115_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_67 = const()[name = tensor<string, []>("tile_67"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6116_axis_0 = const()[name = tensor<string, []>("op_6116_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_6115_cast_fp16 = transpose(perm = var_6115_perm_0, x = k_45_cast_fp16)[name = tensor<string, []>("transpose_10")];
+            tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_19 = split(axis = var_6116_axis_0, split_sizes = tile_67, x = var_6115_cast_fp16)[name = tensor<string, []>("op_6116_cast_fp16")];
+            tensor<int32, [20]> tile_68 = const()[name = tensor<string, []>("tile_68"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6137_axis_0 = const()[name = tensor<string, []>("op_6137_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_19 = split(axis = var_6137_axis_0, split_sizes = tile_68, x = var_6091_cast_fp16)[name = tensor<string, []>("op_6137_cast_fp16")];
+            tensor<string, []> aw_881_equation_0 = const()[name = tensor<string, []>("aw_881_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_881_cast_fp16 = einsum(equation = aw_881_equation_0, values = (var_6116_cast_fp16_0, var_6094_cast_fp16_0))[name = tensor<string, []>("aw_881_cast_fp16")];
+            tensor<string, []> aw_883_equation_0 = const()[name = tensor<string, []>("aw_883_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_883_cast_fp16 = einsum(equation = aw_883_equation_0, values = (var_6116_cast_fp16_1, var_6094_cast_fp16_1))[name = tensor<string, []>("aw_883_cast_fp16")];
+            tensor<string, []> aw_885_equation_0 = const()[name = tensor<string, []>("aw_885_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_885_cast_fp16 = einsum(equation = aw_885_equation_0, values = (var_6116_cast_fp16_2, var_6094_cast_fp16_2))[name = tensor<string, []>("aw_885_cast_fp16")];
+            tensor<string, []> aw_887_equation_0 = const()[name = tensor<string, []>("aw_887_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_887_cast_fp16 = einsum(equation = aw_887_equation_0, values = (var_6116_cast_fp16_3, var_6094_cast_fp16_3))[name = tensor<string, []>("aw_887_cast_fp16")];
+            tensor<string, []> aw_889_equation_0 = const()[name = tensor<string, []>("aw_889_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_889_cast_fp16 = einsum(equation = aw_889_equation_0, values = (var_6116_cast_fp16_4, var_6094_cast_fp16_4))[name = tensor<string, []>("aw_889_cast_fp16")];
+            tensor<string, []> aw_891_equation_0 = const()[name = tensor<string, []>("aw_891_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_891_cast_fp16 = einsum(equation = aw_891_equation_0, values = (var_6116_cast_fp16_5, var_6094_cast_fp16_5))[name = tensor<string, []>("aw_891_cast_fp16")];
+            tensor<string, []> aw_893_equation_0 = const()[name = tensor<string, []>("aw_893_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_893_cast_fp16 = einsum(equation = aw_893_equation_0, values = (var_6116_cast_fp16_6, var_6094_cast_fp16_6))[name = tensor<string, []>("aw_893_cast_fp16")];
+            tensor<string, []> aw_895_equation_0 = const()[name = tensor<string, []>("aw_895_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_895_cast_fp16 = einsum(equation = aw_895_equation_0, values = (var_6116_cast_fp16_7, var_6094_cast_fp16_7))[name = tensor<string, []>("aw_895_cast_fp16")];
+            tensor<string, []> aw_897_equation_0 = const()[name = tensor<string, []>("aw_897_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_897_cast_fp16 = einsum(equation = aw_897_equation_0, values = (var_6116_cast_fp16_8, var_6094_cast_fp16_8))[name = tensor<string, []>("aw_897_cast_fp16")];
+            tensor<string, []> aw_899_equation_0 = const()[name = tensor<string, []>("aw_899_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_899_cast_fp16 = einsum(equation = aw_899_equation_0, values = (var_6116_cast_fp16_9, var_6094_cast_fp16_9))[name = tensor<string, []>("aw_899_cast_fp16")];
+            tensor<string, []> aw_901_equation_0 = const()[name = tensor<string, []>("aw_901_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_901_cast_fp16 = einsum(equation = aw_901_equation_0, values = (var_6116_cast_fp16_10, var_6094_cast_fp16_10))[name = tensor<string, []>("aw_901_cast_fp16")];
+            tensor<string, []> aw_903_equation_0 = const()[name = tensor<string, []>("aw_903_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_903_cast_fp16 = einsum(equation = aw_903_equation_0, values = (var_6116_cast_fp16_11, var_6094_cast_fp16_11))[name = tensor<string, []>("aw_903_cast_fp16")];
+            tensor<string, []> aw_905_equation_0 = const()[name = tensor<string, []>("aw_905_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_905_cast_fp16 = einsum(equation = aw_905_equation_0, values = (var_6116_cast_fp16_12, var_6094_cast_fp16_12))[name = tensor<string, []>("aw_905_cast_fp16")];
+            tensor<string, []> aw_907_equation_0 = const()[name = tensor<string, []>("aw_907_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_907_cast_fp16 = einsum(equation = aw_907_equation_0, values = (var_6116_cast_fp16_13, var_6094_cast_fp16_13))[name = tensor<string, []>("aw_907_cast_fp16")];
+            tensor<string, []> aw_909_equation_0 = const()[name = tensor<string, []>("aw_909_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_909_cast_fp16 = einsum(equation = aw_909_equation_0, values = (var_6116_cast_fp16_14, var_6094_cast_fp16_14))[name = tensor<string, []>("aw_909_cast_fp16")];
+            tensor<string, []> aw_911_equation_0 = const()[name = tensor<string, []>("aw_911_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_911_cast_fp16 = einsum(equation = aw_911_equation_0, values = (var_6116_cast_fp16_15, var_6094_cast_fp16_15))[name = tensor<string, []>("aw_911_cast_fp16")];
+            tensor<string, []> aw_913_equation_0 = const()[name = tensor<string, []>("aw_913_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_913_cast_fp16 = einsum(equation = aw_913_equation_0, values = (var_6116_cast_fp16_16, var_6094_cast_fp16_16))[name = tensor<string, []>("aw_913_cast_fp16")];
+            tensor<string, []> aw_915_equation_0 = const()[name = tensor<string, []>("aw_915_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_915_cast_fp16 = einsum(equation = aw_915_equation_0, values = (var_6116_cast_fp16_17, var_6094_cast_fp16_17))[name = tensor<string, []>("aw_915_cast_fp16")];
+            tensor<string, []> aw_917_equation_0 = const()[name = tensor<string, []>("aw_917_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_917_cast_fp16 = einsum(equation = aw_917_equation_0, values = (var_6116_cast_fp16_18, var_6094_cast_fp16_18))[name = tensor<string, []>("aw_917_cast_fp16")];
+            tensor<string, []> aw_919_equation_0 = const()[name = tensor<string, []>("aw_919_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_919_cast_fp16 = einsum(equation = aw_919_equation_0, values = (var_6116_cast_fp16_19, var_6094_cast_fp16_19))[name = tensor<string, []>("aw_919_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6198_cast_fp16 = softmax(axis = var_6042, x = aw_881_cast_fp16)[name = tensor<string, []>("op_6198_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6199_cast_fp16 = softmax(axis = var_6042, x = aw_883_cast_fp16)[name = tensor<string, []>("op_6199_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6200_cast_fp16 = softmax(axis = var_6042, x = aw_885_cast_fp16)[name = tensor<string, []>("op_6200_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6201_cast_fp16 = softmax(axis = var_6042, x = aw_887_cast_fp16)[name = tensor<string, []>("op_6201_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6202_cast_fp16 = softmax(axis = var_6042, x = aw_889_cast_fp16)[name = tensor<string, []>("op_6202_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6203_cast_fp16 = softmax(axis = var_6042, x = aw_891_cast_fp16)[name = tensor<string, []>("op_6203_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6204_cast_fp16 = softmax(axis = var_6042, x = aw_893_cast_fp16)[name = tensor<string, []>("op_6204_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6205_cast_fp16 = softmax(axis = var_6042, x = aw_895_cast_fp16)[name = tensor<string, []>("op_6205_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6206_cast_fp16 = softmax(axis = var_6042, x = aw_897_cast_fp16)[name = tensor<string, []>("op_6206_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6207_cast_fp16 = softmax(axis = var_6042, x = aw_899_cast_fp16)[name = tensor<string, []>("op_6207_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6208_cast_fp16 = softmax(axis = var_6042, x = aw_901_cast_fp16)[name = tensor<string, []>("op_6208_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6209_cast_fp16 = softmax(axis = var_6042, x = aw_903_cast_fp16)[name = tensor<string, []>("op_6209_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6210_cast_fp16 = softmax(axis = var_6042, x = aw_905_cast_fp16)[name = tensor<string, []>("op_6210_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6211_cast_fp16 = softmax(axis = var_6042, x = aw_907_cast_fp16)[name = tensor<string, []>("op_6211_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6212_cast_fp16 = softmax(axis = var_6042, x = aw_909_cast_fp16)[name = tensor<string, []>("op_6212_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6213_cast_fp16 = softmax(axis = var_6042, x = aw_911_cast_fp16)[name = tensor<string, []>("op_6213_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6214_cast_fp16 = softmax(axis = var_6042, x = aw_913_cast_fp16)[name = tensor<string, []>("op_6214_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6215_cast_fp16 = softmax(axis = var_6042, x = aw_915_cast_fp16)[name = tensor<string, []>("op_6215_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6216_cast_fp16 = softmax(axis = var_6042, x = aw_917_cast_fp16)[name = tensor<string, []>("op_6216_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6217_cast_fp16 = softmax(axis = var_6042, x = aw_919_cast_fp16)[name = tensor<string, []>("op_6217_cast_fp16")];
+            tensor<string, []> var_6219_equation_0 = const()[name = tensor<string, []>("op_6219_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6219_cast_fp16 = einsum(equation = var_6219_equation_0, values = (var_6137_cast_fp16_0, var_6198_cast_fp16))[name = tensor<string, []>("op_6219_cast_fp16")];
+            tensor<string, []> var_6221_equation_0 = const()[name = tensor<string, []>("op_6221_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6221_cast_fp16 = einsum(equation = var_6221_equation_0, values = (var_6137_cast_fp16_1, var_6199_cast_fp16))[name = tensor<string, []>("op_6221_cast_fp16")];
+            tensor<string, []> var_6223_equation_0 = const()[name = tensor<string, []>("op_6223_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6223_cast_fp16 = einsum(equation = var_6223_equation_0, values = (var_6137_cast_fp16_2, var_6200_cast_fp16))[name = tensor<string, []>("op_6223_cast_fp16")];
+            tensor<string, []> var_6225_equation_0 = const()[name = tensor<string, []>("op_6225_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6225_cast_fp16 = einsum(equation = var_6225_equation_0, values = (var_6137_cast_fp16_3, var_6201_cast_fp16))[name = tensor<string, []>("op_6225_cast_fp16")];
+            tensor<string, []> var_6227_equation_0 = const()[name = tensor<string, []>("op_6227_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6227_cast_fp16 = einsum(equation = var_6227_equation_0, values = (var_6137_cast_fp16_4, var_6202_cast_fp16))[name = tensor<string, []>("op_6227_cast_fp16")];
+            tensor<string, []> var_6229_equation_0 = const()[name = tensor<string, []>("op_6229_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6229_cast_fp16 = einsum(equation = var_6229_equation_0, values = (var_6137_cast_fp16_5, var_6203_cast_fp16))[name = tensor<string, []>("op_6229_cast_fp16")];
+            tensor<string, []> var_6231_equation_0 = const()[name = tensor<string, []>("op_6231_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6231_cast_fp16 = einsum(equation = var_6231_equation_0, values = (var_6137_cast_fp16_6, var_6204_cast_fp16))[name = tensor<string, []>("op_6231_cast_fp16")];
+            tensor<string, []> var_6233_equation_0 = const()[name = tensor<string, []>("op_6233_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6233_cast_fp16 = einsum(equation = var_6233_equation_0, values = (var_6137_cast_fp16_7, var_6205_cast_fp16))[name = tensor<string, []>("op_6233_cast_fp16")];
+            tensor<string, []> var_6235_equation_0 = const()[name = tensor<string, []>("op_6235_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6235_cast_fp16 = einsum(equation = var_6235_equation_0, values = (var_6137_cast_fp16_8, var_6206_cast_fp16))[name = tensor<string, []>("op_6235_cast_fp16")];
+            tensor<string, []> var_6237_equation_0 = const()[name = tensor<string, []>("op_6237_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6237_cast_fp16 = einsum(equation = var_6237_equation_0, values = (var_6137_cast_fp16_9, var_6207_cast_fp16))[name = tensor<string, []>("op_6237_cast_fp16")];
+            tensor<string, []> var_6239_equation_0 = const()[name = tensor<string, []>("op_6239_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6239_cast_fp16 = einsum(equation = var_6239_equation_0, values = (var_6137_cast_fp16_10, var_6208_cast_fp16))[name = tensor<string, []>("op_6239_cast_fp16")];
+            tensor<string, []> var_6241_equation_0 = const()[name = tensor<string, []>("op_6241_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6241_cast_fp16 = einsum(equation = var_6241_equation_0, values = (var_6137_cast_fp16_11, var_6209_cast_fp16))[name = tensor<string, []>("op_6241_cast_fp16")];
+            tensor<string, []> var_6243_equation_0 = const()[name = tensor<string, []>("op_6243_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6243_cast_fp16 = einsum(equation = var_6243_equation_0, values = (var_6137_cast_fp16_12, var_6210_cast_fp16))[name = tensor<string, []>("op_6243_cast_fp16")];
+            tensor<string, []> var_6245_equation_0 = const()[name = tensor<string, []>("op_6245_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6245_cast_fp16 = einsum(equation = var_6245_equation_0, values = (var_6137_cast_fp16_13, var_6211_cast_fp16))[name = tensor<string, []>("op_6245_cast_fp16")];
+            tensor<string, []> var_6247_equation_0 = const()[name = tensor<string, []>("op_6247_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6247_cast_fp16 = einsum(equation = var_6247_equation_0, values = (var_6137_cast_fp16_14, var_6212_cast_fp16))[name = tensor<string, []>("op_6247_cast_fp16")];
+            tensor<string, []> var_6249_equation_0 = const()[name = tensor<string, []>("op_6249_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6249_cast_fp16 = einsum(equation = var_6249_equation_0, values = (var_6137_cast_fp16_15, var_6213_cast_fp16))[name = tensor<string, []>("op_6249_cast_fp16")];
+            tensor<string, []> var_6251_equation_0 = const()[name = tensor<string, []>("op_6251_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6251_cast_fp16 = einsum(equation = var_6251_equation_0, values = (var_6137_cast_fp16_16, var_6214_cast_fp16))[name = tensor<string, []>("op_6251_cast_fp16")];
+            tensor<string, []> var_6253_equation_0 = const()[name = tensor<string, []>("op_6253_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6253_cast_fp16 = einsum(equation = var_6253_equation_0, values = (var_6137_cast_fp16_17, var_6215_cast_fp16))[name = tensor<string, []>("op_6253_cast_fp16")];
+            tensor<string, []> var_6255_equation_0 = const()[name = tensor<string, []>("op_6255_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6255_cast_fp16 = einsum(equation = var_6255_equation_0, values = (var_6137_cast_fp16_18, var_6216_cast_fp16))[name = tensor<string, []>("op_6255_cast_fp16")];
+            tensor<string, []> var_6257_equation_0 = const()[name = tensor<string, []>("op_6257_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6257_cast_fp16 = einsum(equation = var_6257_equation_0, values = (var_6137_cast_fp16_19, var_6217_cast_fp16))[name = tensor<string, []>("op_6257_cast_fp16")];
+            tensor<bool, []> input_225_interleave_0 = const()[name = tensor<string, []>("input_225_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_225_cast_fp16 = concat(axis = var_6042, interleave = input_225_interleave_0, values = (var_6219_cast_fp16, var_6221_cast_fp16, var_6223_cast_fp16, var_6225_cast_fp16, var_6227_cast_fp16, var_6229_cast_fp16, var_6231_cast_fp16, var_6233_cast_fp16, var_6235_cast_fp16, var_6237_cast_fp16, var_6239_cast_fp16, var_6241_cast_fp16, var_6243_cast_fp16, var_6245_cast_fp16, var_6247_cast_fp16, var_6249_cast_fp16, var_6251_cast_fp16, var_6253_cast_fp16, var_6255_cast_fp16, var_6257_cast_fp16))[name = tensor<string, []>("input_225_cast_fp16")];
+            tensor<string, []> var_6266_pad_type_0 = const()[name = tensor<string, []>("op_6266_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6266_strides_0 = const()[name = tensor<string, []>("op_6266_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6266_pad_0 = const()[name = tensor<string, []>("op_6266_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6266_dilations_0 = const()[name = tensor<string, []>("op_6266_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6266_groups_0 = const()[name = tensor<string, []>("op_6266_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_22_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(889903552)))];
+            tensor<fp16, [1280]> blocks_22_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(893180416)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6266_cast_fp16 = conv(bias = blocks_22_attn_out_bias_to_fp16, dilations = var_6266_dilations_0, groups = var_6266_groups_0, pad = var_6266_pad_0, pad_type = var_6266_pad_type_0, strides = var_6266_strides_0, weight = blocks_22_attn_out_weight_to_fp16, x = input_225_cast_fp16)[name = tensor<string, []>("op_6266_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_91_cast_fp16 = add(x = inputs_89_cast_fp16, y = var_6266_cast_fp16)[name = tensor<string, []>("inputs_91_cast_fp16")];
+            tensor<int32, [1]> input_227_axes_0 = const()[name = tensor<string, []>("input_227_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_227_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_227_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(893183040)))];
+            tensor<fp16, [1280]> input_227_beta_0_to_fp16 = const()[name = tensor<string, []>("input_227_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(893185664)))];
+            tensor<fp16, []> var_6276_to_fp16 = const()[name = tensor<string, []>("op_6276_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_227_cast_fp16 = layer_norm(axes = input_227_axes_0, beta = input_227_beta_0_to_fp16, epsilon = var_6276_to_fp16, gamma = input_227_gamma_0_to_fp16, x = inputs_91_cast_fp16)[name = tensor<string, []>("input_227_cast_fp16")];
+            tensor<string, []> input_229_pad_type_0 = const()[name = tensor<string, []>("input_229_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_229_strides_0 = const()[name = tensor<string, []>("input_229_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_229_pad_0 = const()[name = tensor<string, []>("input_229_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_229_dilations_0 = const()[name = tensor<string, []>("input_229_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_229_groups_0 = const()[name = tensor<string, []>("input_229_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_22_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(893188288)))];
+            tensor<fp16, [5120]> blocks_22_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(906295552)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_229_cast_fp16 = conv(bias = blocks_22_mlp_0_bias_to_fp16, dilations = input_229_dilations_0, groups = input_229_groups_0, pad = input_229_pad_0, pad_type = input_229_pad_type_0, strides = input_229_strides_0, weight = blocks_22_mlp_0_weight_to_fp16, x = input_227_cast_fp16)[name = tensor<string, []>("input_229_cast_fp16")];
+            tensor<string, []> input_231_mode_0 = const()[name = tensor<string, []>("input_231_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_231_cast_fp16 = gelu(mode = input_231_mode_0, x = input_229_cast_fp16)[name = tensor<string, []>("input_231_cast_fp16")];
+            tensor<string, []> var_6302_pad_type_0 = const()[name = tensor<string, []>("op_6302_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6302_strides_0 = const()[name = tensor<string, []>("op_6302_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6302_pad_0 = const()[name = tensor<string, []>("op_6302_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6302_dilations_0 = const()[name = tensor<string, []>("op_6302_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6302_groups_0 = const()[name = tensor<string, []>("op_6302_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_22_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(906305856)))];
+            tensor<fp16, [1280]> blocks_22_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(919413120)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6302_cast_fp16 = conv(bias = blocks_22_mlp_2_bias_to_fp16, dilations = var_6302_dilations_0, groups = var_6302_groups_0, pad = var_6302_pad_0, pad_type = var_6302_pad_type_0, strides = var_6302_strides_0, weight = blocks_22_mlp_2_weight_to_fp16, x = input_231_cast_fp16)[name = tensor<string, []>("op_6302_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_93_cast_fp16 = add(x = inputs_91_cast_fp16, y = var_6302_cast_fp16)[name = tensor<string, []>("inputs_93_cast_fp16")];
+            tensor<int32, []> var_6311 = const()[name = tensor<string, []>("op_6311"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_233_axes_0 = const()[name = tensor<string, []>("input_233_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_233_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_233_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(919415744)))];
+            tensor<fp16, [1280]> input_233_beta_0_to_fp16 = const()[name = tensor<string, []>("input_233_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(919418368)))];
+            tensor<fp16, []> var_6327_to_fp16 = const()[name = tensor<string, []>("op_6327_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_233_cast_fp16 = layer_norm(axes = input_233_axes_0, beta = input_233_beta_0_to_fp16, epsilon = var_6327_to_fp16, gamma = input_233_gamma_0_to_fp16, x = inputs_93_cast_fp16)[name = tensor<string, []>("input_233_cast_fp16")];
+            tensor<string, []> q_47_pad_type_0 = const()[name = tensor<string, []>("q_47_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_47_strides_0 = const()[name = tensor<string, []>("q_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_47_pad_0 = const()[name = tensor<string, []>("q_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_47_dilations_0 = const()[name = tensor<string, []>("q_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_47_groups_0 = const()[name = tensor<string, []>("q_47_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_6362_weight_0_to_fp16 = const()[name = tensor<string, []>("op_6362_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(919420992)))];
+            tensor<fp16, [1280]> var_6362_bias_0_to_fp16 = const()[name = tensor<string, []>("op_6362_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(922697856)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6362_cast_fp16 = conv(bias = var_6362_bias_0_to_fp16, dilations = q_47_dilations_0, groups = q_47_groups_0, pad = q_47_pad_0, pad_type = q_47_pad_type_0, strides = q_47_strides_0, weight = var_6362_weight_0_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("op_6362_cast_fp16")];
+            tensor<string, []> k_47_pad_type_0 = const()[name = tensor<string, []>("k_47_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_47_strides_0 = const()[name = tensor<string, []>("k_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_47_pad_0 = const()[name = tensor<string, []>("k_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_47_dilations_0 = const()[name = tensor<string, []>("k_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_47_groups_0 = const()[name = tensor<string, []>("k_47_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_23_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(922700480)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_47_cast_fp16 = conv(dilations = k_47_dilations_0, groups = k_47_groups_0, pad = k_47_pad_0, pad_type = k_47_pad_type_0, strides = k_47_strides_0, weight = blocks_23_attn_key_weight_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("k_47_cast_fp16")];
+            tensor<string, []> var_6360_pad_type_0 = const()[name = tensor<string, []>("op_6360_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6360_strides_0 = const()[name = tensor<string, []>("op_6360_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6360_pad_0 = const()[name = tensor<string, []>("op_6360_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6360_dilations_0 = const()[name = tensor<string, []>("op_6360_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6360_groups_0 = const()[name = tensor<string, []>("op_6360_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_23_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(925977344)))];
+            tensor<fp16, [1280]> blocks_23_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(929254208)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6360_cast_fp16 = conv(bias = blocks_23_attn_value_bias_to_fp16, dilations = var_6360_dilations_0, groups = var_6360_groups_0, pad = var_6360_pad_0, pad_type = var_6360_pad_type_0, strides = var_6360_strides_0, weight = blocks_23_attn_value_weight_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("op_6360_cast_fp16")];
+            tensor<int32, [20]> tile_69 = const()[name = tensor<string, []>("tile_69"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6363_axis_0 = const()[name = tensor<string, []>("op_6363_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_19 = split(axis = var_6363_axis_0, split_sizes = tile_69, x = var_6362_cast_fp16)[name = tensor<string, []>("op_6363_cast_fp16")];
+            tensor<int32, [4]> var_6384_perm_0 = const()[name = tensor<string, []>("op_6384_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_70 = const()[name = tensor<string, []>("tile_70"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6385_axis_0 = const()[name = tensor<string, []>("op_6385_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_6384_cast_fp16 = transpose(perm = var_6384_perm_0, x = k_47_cast_fp16)[name = tensor<string, []>("transpose_9")];
+            tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_19 = split(axis = var_6385_axis_0, split_sizes = tile_70, x = var_6384_cast_fp16)[name = tensor<string, []>("op_6385_cast_fp16")];
+            tensor<int32, [20]> tile_71 = const()[name = tensor<string, []>("tile_71"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6406_axis_0 = const()[name = tensor<string, []>("op_6406_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_19 = split(axis = var_6406_axis_0, split_sizes = tile_71, x = var_6360_cast_fp16)[name = tensor<string, []>("op_6406_cast_fp16")];
+            tensor<string, []> aw_921_equation_0 = const()[name = tensor<string, []>("aw_921_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_921_cast_fp16 = einsum(equation = aw_921_equation_0, values = (var_6385_cast_fp16_0, var_6363_cast_fp16_0))[name = tensor<string, []>("aw_921_cast_fp16")];
+            tensor<string, []> aw_923_equation_0 = const()[name = tensor<string, []>("aw_923_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_923_cast_fp16 = einsum(equation = aw_923_equation_0, values = (var_6385_cast_fp16_1, var_6363_cast_fp16_1))[name = tensor<string, []>("aw_923_cast_fp16")];
+            tensor<string, []> aw_925_equation_0 = const()[name = tensor<string, []>("aw_925_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_925_cast_fp16 = einsum(equation = aw_925_equation_0, values = (var_6385_cast_fp16_2, var_6363_cast_fp16_2))[name = tensor<string, []>("aw_925_cast_fp16")];
+            tensor<string, []> aw_927_equation_0 = const()[name = tensor<string, []>("aw_927_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_927_cast_fp16 = einsum(equation = aw_927_equation_0, values = (var_6385_cast_fp16_3, var_6363_cast_fp16_3))[name = tensor<string, []>("aw_927_cast_fp16")];
+            tensor<string, []> aw_929_equation_0 = const()[name = tensor<string, []>("aw_929_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_929_cast_fp16 = einsum(equation = aw_929_equation_0, values = (var_6385_cast_fp16_4, var_6363_cast_fp16_4))[name = tensor<string, []>("aw_929_cast_fp16")];
+            tensor<string, []> aw_931_equation_0 = const()[name = tensor<string, []>("aw_931_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_931_cast_fp16 = einsum(equation = aw_931_equation_0, values = (var_6385_cast_fp16_5, var_6363_cast_fp16_5))[name = tensor<string, []>("aw_931_cast_fp16")];
+            tensor<string, []> aw_933_equation_0 = const()[name = tensor<string, []>("aw_933_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_933_cast_fp16 = einsum(equation = aw_933_equation_0, values = (var_6385_cast_fp16_6, var_6363_cast_fp16_6))[name = tensor<string, []>("aw_933_cast_fp16")];
+            tensor<string, []> aw_935_equation_0 = const()[name = tensor<string, []>("aw_935_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_935_cast_fp16 = einsum(equation = aw_935_equation_0, values = (var_6385_cast_fp16_7, var_6363_cast_fp16_7))[name = tensor<string, []>("aw_935_cast_fp16")];
+            tensor<string, []> aw_937_equation_0 = const()[name = tensor<string, []>("aw_937_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_937_cast_fp16 = einsum(equation = aw_937_equation_0, values = (var_6385_cast_fp16_8, var_6363_cast_fp16_8))[name = tensor<string, []>("aw_937_cast_fp16")];
+            tensor<string, []> aw_939_equation_0 = const()[name = tensor<string, []>("aw_939_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_939_cast_fp16 = einsum(equation = aw_939_equation_0, values = (var_6385_cast_fp16_9, var_6363_cast_fp16_9))[name = tensor<string, []>("aw_939_cast_fp16")];
+            tensor<string, []> aw_941_equation_0 = const()[name = tensor<string, []>("aw_941_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_941_cast_fp16 = einsum(equation = aw_941_equation_0, values = (var_6385_cast_fp16_10, var_6363_cast_fp16_10))[name = tensor<string, []>("aw_941_cast_fp16")];
+            tensor<string, []> aw_943_equation_0 = const()[name = tensor<string, []>("aw_943_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_943_cast_fp16 = einsum(equation = aw_943_equation_0, values = (var_6385_cast_fp16_11, var_6363_cast_fp16_11))[name = tensor<string, []>("aw_943_cast_fp16")];
+            tensor<string, []> aw_945_equation_0 = const()[name = tensor<string, []>("aw_945_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_945_cast_fp16 = einsum(equation = aw_945_equation_0, values = (var_6385_cast_fp16_12, var_6363_cast_fp16_12))[name = tensor<string, []>("aw_945_cast_fp16")];
+            tensor<string, []> aw_947_equation_0 = const()[name = tensor<string, []>("aw_947_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_947_cast_fp16 = einsum(equation = aw_947_equation_0, values = (var_6385_cast_fp16_13, var_6363_cast_fp16_13))[name = tensor<string, []>("aw_947_cast_fp16")];
+            tensor<string, []> aw_949_equation_0 = const()[name = tensor<string, []>("aw_949_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_949_cast_fp16 = einsum(equation = aw_949_equation_0, values = (var_6385_cast_fp16_14, var_6363_cast_fp16_14))[name = tensor<string, []>("aw_949_cast_fp16")];
+            tensor<string, []> aw_951_equation_0 = const()[name = tensor<string, []>("aw_951_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_951_cast_fp16 = einsum(equation = aw_951_equation_0, values = (var_6385_cast_fp16_15, var_6363_cast_fp16_15))[name = tensor<string, []>("aw_951_cast_fp16")];
+            tensor<string, []> aw_953_equation_0 = const()[name = tensor<string, []>("aw_953_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_953_cast_fp16 = einsum(equation = aw_953_equation_0, values = (var_6385_cast_fp16_16, var_6363_cast_fp16_16))[name = tensor<string, []>("aw_953_cast_fp16")];
+            tensor<string, []> aw_955_equation_0 = const()[name = tensor<string, []>("aw_955_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_955_cast_fp16 = einsum(equation = aw_955_equation_0, values = (var_6385_cast_fp16_17, var_6363_cast_fp16_17))[name = tensor<string, []>("aw_955_cast_fp16")];
+            tensor<string, []> aw_957_equation_0 = const()[name = tensor<string, []>("aw_957_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_957_cast_fp16 = einsum(equation = aw_957_equation_0, values = (var_6385_cast_fp16_18, var_6363_cast_fp16_18))[name = tensor<string, []>("aw_957_cast_fp16")];
+            tensor<string, []> aw_959_equation_0 = const()[name = tensor<string, []>("aw_959_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_959_cast_fp16 = einsum(equation = aw_959_equation_0, values = (var_6385_cast_fp16_19, var_6363_cast_fp16_19))[name = tensor<string, []>("aw_959_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6467_cast_fp16 = softmax(axis = var_6311, x = aw_921_cast_fp16)[name = tensor<string, []>("op_6467_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6468_cast_fp16 = softmax(axis = var_6311, x = aw_923_cast_fp16)[name = tensor<string, []>("op_6468_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6469_cast_fp16 = softmax(axis = var_6311, x = aw_925_cast_fp16)[name = tensor<string, []>("op_6469_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6470_cast_fp16 = softmax(axis = var_6311, x = aw_927_cast_fp16)[name = tensor<string, []>("op_6470_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6471_cast_fp16 = softmax(axis = var_6311, x = aw_929_cast_fp16)[name = tensor<string, []>("op_6471_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6472_cast_fp16 = softmax(axis = var_6311, x = aw_931_cast_fp16)[name = tensor<string, []>("op_6472_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6473_cast_fp16 = softmax(axis = var_6311, x = aw_933_cast_fp16)[name = tensor<string, []>("op_6473_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6474_cast_fp16 = softmax(axis = var_6311, x = aw_935_cast_fp16)[name = tensor<string, []>("op_6474_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6475_cast_fp16 = softmax(axis = var_6311, x = aw_937_cast_fp16)[name = tensor<string, []>("op_6475_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6476_cast_fp16 = softmax(axis = var_6311, x = aw_939_cast_fp16)[name = tensor<string, []>("op_6476_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6477_cast_fp16 = softmax(axis = var_6311, x = aw_941_cast_fp16)[name = tensor<string, []>("op_6477_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6478_cast_fp16 = softmax(axis = var_6311, x = aw_943_cast_fp16)[name = tensor<string, []>("op_6478_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6479_cast_fp16 = softmax(axis = var_6311, x = aw_945_cast_fp16)[name = tensor<string, []>("op_6479_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6480_cast_fp16 = softmax(axis = var_6311, x = aw_947_cast_fp16)[name = tensor<string, []>("op_6480_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6481_cast_fp16 = softmax(axis = var_6311, x = aw_949_cast_fp16)[name = tensor<string, []>("op_6481_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6482_cast_fp16 = softmax(axis = var_6311, x = aw_951_cast_fp16)[name = tensor<string, []>("op_6482_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6483_cast_fp16 = softmax(axis = var_6311, x = aw_953_cast_fp16)[name = tensor<string, []>("op_6483_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6484_cast_fp16 = softmax(axis = var_6311, x = aw_955_cast_fp16)[name = tensor<string, []>("op_6484_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6485_cast_fp16 = softmax(axis = var_6311, x = aw_957_cast_fp16)[name = tensor<string, []>("op_6485_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6486_cast_fp16 = softmax(axis = var_6311, x = aw_959_cast_fp16)[name = tensor<string, []>("op_6486_cast_fp16")];
+            tensor<string, []> var_6488_equation_0 = const()[name = tensor<string, []>("op_6488_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6488_cast_fp16 = einsum(equation = var_6488_equation_0, values = (var_6406_cast_fp16_0, var_6467_cast_fp16))[name = tensor<string, []>("op_6488_cast_fp16")];
+            tensor<string, []> var_6490_equation_0 = const()[name = tensor<string, []>("op_6490_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6490_cast_fp16 = einsum(equation = var_6490_equation_0, values = (var_6406_cast_fp16_1, var_6468_cast_fp16))[name = tensor<string, []>("op_6490_cast_fp16")];
+            tensor<string, []> var_6492_equation_0 = const()[name = tensor<string, []>("op_6492_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6492_cast_fp16 = einsum(equation = var_6492_equation_0, values = (var_6406_cast_fp16_2, var_6469_cast_fp16))[name = tensor<string, []>("op_6492_cast_fp16")];
+            tensor<string, []> var_6494_equation_0 = const()[name = tensor<string, []>("op_6494_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6494_cast_fp16 = einsum(equation = var_6494_equation_0, values = (var_6406_cast_fp16_3, var_6470_cast_fp16))[name = tensor<string, []>("op_6494_cast_fp16")];
+            tensor<string, []> var_6496_equation_0 = const()[name = tensor<string, []>("op_6496_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6496_cast_fp16 = einsum(equation = var_6496_equation_0, values = (var_6406_cast_fp16_4, var_6471_cast_fp16))[name = tensor<string, []>("op_6496_cast_fp16")];
+            tensor<string, []> var_6498_equation_0 = const()[name = tensor<string, []>("op_6498_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6498_cast_fp16 = einsum(equation = var_6498_equation_0, values = (var_6406_cast_fp16_5, var_6472_cast_fp16))[name = tensor<string, []>("op_6498_cast_fp16")];
+            tensor<string, []> var_6500_equation_0 = const()[name = tensor<string, []>("op_6500_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6500_cast_fp16 = einsum(equation = var_6500_equation_0, values = (var_6406_cast_fp16_6, var_6473_cast_fp16))[name = tensor<string, []>("op_6500_cast_fp16")];
+            tensor<string, []> var_6502_equation_0 = const()[name = tensor<string, []>("op_6502_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6502_cast_fp16 = einsum(equation = var_6502_equation_0, values = (var_6406_cast_fp16_7, var_6474_cast_fp16))[name = tensor<string, []>("op_6502_cast_fp16")];
+            tensor<string, []> var_6504_equation_0 = const()[name = tensor<string, []>("op_6504_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6504_cast_fp16 = einsum(equation = var_6504_equation_0, values = (var_6406_cast_fp16_8, var_6475_cast_fp16))[name = tensor<string, []>("op_6504_cast_fp16")];
+            tensor<string, []> var_6506_equation_0 = const()[name = tensor<string, []>("op_6506_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6506_cast_fp16 = einsum(equation = var_6506_equation_0, values = (var_6406_cast_fp16_9, var_6476_cast_fp16))[name = tensor<string, []>("op_6506_cast_fp16")];
+            tensor<string, []> var_6508_equation_0 = const()[name = tensor<string, []>("op_6508_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6508_cast_fp16 = einsum(equation = var_6508_equation_0, values = (var_6406_cast_fp16_10, var_6477_cast_fp16))[name = tensor<string, []>("op_6508_cast_fp16")];
+            tensor<string, []> var_6510_equation_0 = const()[name = tensor<string, []>("op_6510_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6510_cast_fp16 = einsum(equation = var_6510_equation_0, values = (var_6406_cast_fp16_11, var_6478_cast_fp16))[name = tensor<string, []>("op_6510_cast_fp16")];
+            tensor<string, []> var_6512_equation_0 = const()[name = tensor<string, []>("op_6512_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6512_cast_fp16 = einsum(equation = var_6512_equation_0, values = (var_6406_cast_fp16_12, var_6479_cast_fp16))[name = tensor<string, []>("op_6512_cast_fp16")];
+            tensor<string, []> var_6514_equation_0 = const()[name = tensor<string, []>("op_6514_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6514_cast_fp16 = einsum(equation = var_6514_equation_0, values = (var_6406_cast_fp16_13, var_6480_cast_fp16))[name = tensor<string, []>("op_6514_cast_fp16")];
+            tensor<string, []> var_6516_equation_0 = const()[name = tensor<string, []>("op_6516_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6516_cast_fp16 = einsum(equation = var_6516_equation_0, values = (var_6406_cast_fp16_14, var_6481_cast_fp16))[name = tensor<string, []>("op_6516_cast_fp16")];
+            tensor<string, []> var_6518_equation_0 = const()[name = tensor<string, []>("op_6518_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6518_cast_fp16 = einsum(equation = var_6518_equation_0, values = (var_6406_cast_fp16_15, var_6482_cast_fp16))[name = tensor<string, []>("op_6518_cast_fp16")];
+            tensor<string, []> var_6520_equation_0 = const()[name = tensor<string, []>("op_6520_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6520_cast_fp16 = einsum(equation = var_6520_equation_0, values = (var_6406_cast_fp16_16, var_6483_cast_fp16))[name = tensor<string, []>("op_6520_cast_fp16")];
+            tensor<string, []> var_6522_equation_0 = const()[name = tensor<string, []>("op_6522_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6522_cast_fp16 = einsum(equation = var_6522_equation_0, values = (var_6406_cast_fp16_17, var_6484_cast_fp16))[name = tensor<string, []>("op_6522_cast_fp16")];
+            tensor<string, []> var_6524_equation_0 = const()[name = tensor<string, []>("op_6524_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6524_cast_fp16 = einsum(equation = var_6524_equation_0, values = (var_6406_cast_fp16_18, var_6485_cast_fp16))[name = tensor<string, []>("op_6524_cast_fp16")];
+            tensor<string, []> var_6526_equation_0 = const()[name = tensor<string, []>("op_6526_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6526_cast_fp16 = einsum(equation = var_6526_equation_0, values = (var_6406_cast_fp16_19, var_6486_cast_fp16))[name = tensor<string, []>("op_6526_cast_fp16")];
+            tensor<bool, []> input_235_interleave_0 = const()[name = tensor<string, []>("input_235_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_235_cast_fp16 = concat(axis = var_6311, interleave = input_235_interleave_0, values = (var_6488_cast_fp16, var_6490_cast_fp16, var_6492_cast_fp16, var_6494_cast_fp16, var_6496_cast_fp16, var_6498_cast_fp16, var_6500_cast_fp16, var_6502_cast_fp16, var_6504_cast_fp16, var_6506_cast_fp16, var_6508_cast_fp16, var_6510_cast_fp16, var_6512_cast_fp16, var_6514_cast_fp16, var_6516_cast_fp16, var_6518_cast_fp16, var_6520_cast_fp16, var_6522_cast_fp16, var_6524_cast_fp16, var_6526_cast_fp16))[name = tensor<string, []>("input_235_cast_fp16")];
+            tensor<string, []> var_6535_pad_type_0 = const()[name = tensor<string, []>("op_6535_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6535_strides_0 = const()[name = tensor<string, []>("op_6535_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6535_pad_0 = const()[name = tensor<string, []>("op_6535_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6535_dilations_0 = const()[name = tensor<string, []>("op_6535_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6535_groups_0 = const()[name = tensor<string, []>("op_6535_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_23_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(929256832)))];
+            tensor<fp16, [1280]> blocks_23_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(932533696)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6535_cast_fp16 = conv(bias = blocks_23_attn_out_bias_to_fp16, dilations = var_6535_dilations_0, groups = var_6535_groups_0, pad = var_6535_pad_0, pad_type = var_6535_pad_type_0, strides = var_6535_strides_0, weight = blocks_23_attn_out_weight_to_fp16, x = input_235_cast_fp16)[name = tensor<string, []>("op_6535_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_95_cast_fp16 = add(x = inputs_93_cast_fp16, y = var_6535_cast_fp16)[name = tensor<string, []>("inputs_95_cast_fp16")];
+            tensor<int32, [1]> input_237_axes_0 = const()[name = tensor<string, []>("input_237_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_237_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_237_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(932536320)))];
+            tensor<fp16, [1280]> input_237_beta_0_to_fp16 = const()[name = tensor<string, []>("input_237_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(932538944)))];
+            tensor<fp16, []> var_6545_to_fp16 = const()[name = tensor<string, []>("op_6545_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_237_cast_fp16 = layer_norm(axes = input_237_axes_0, beta = input_237_beta_0_to_fp16, epsilon = var_6545_to_fp16, gamma = input_237_gamma_0_to_fp16, x = inputs_95_cast_fp16)[name = tensor<string, []>("input_237_cast_fp16")];
+            tensor<string, []> input_239_pad_type_0 = const()[name = tensor<string, []>("input_239_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_239_strides_0 = const()[name = tensor<string, []>("input_239_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_239_pad_0 = const()[name = tensor<string, []>("input_239_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_239_dilations_0 = const()[name = tensor<string, []>("input_239_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_239_groups_0 = const()[name = tensor<string, []>("input_239_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_23_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(932541568)))];
+            tensor<fp16, [5120]> blocks_23_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(945648832)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_239_cast_fp16 = conv(bias = blocks_23_mlp_0_bias_to_fp16, dilations = input_239_dilations_0, groups = input_239_groups_0, pad = input_239_pad_0, pad_type = input_239_pad_type_0, strides = input_239_strides_0, weight = blocks_23_mlp_0_weight_to_fp16, x = input_237_cast_fp16)[name = tensor<string, []>("input_239_cast_fp16")];
+            tensor<string, []> input_241_mode_0 = const()[name = tensor<string, []>("input_241_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_241_cast_fp16 = gelu(mode = input_241_mode_0, x = input_239_cast_fp16)[name = tensor<string, []>("input_241_cast_fp16")];
+            tensor<string, []> var_6571_pad_type_0 = const()[name = tensor<string, []>("op_6571_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6571_strides_0 = const()[name = tensor<string, []>("op_6571_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6571_pad_0 = const()[name = tensor<string, []>("op_6571_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6571_dilations_0 = const()[name = tensor<string, []>("op_6571_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6571_groups_0 = const()[name = tensor<string, []>("op_6571_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_23_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(945659136)))];
+            tensor<fp16, [1280]> blocks_23_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(958766400)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6571_cast_fp16 = conv(bias = blocks_23_mlp_2_bias_to_fp16, dilations = var_6571_dilations_0, groups = var_6571_groups_0, pad = var_6571_pad_0, pad_type = var_6571_pad_type_0, strides = var_6571_strides_0, weight = blocks_23_mlp_2_weight_to_fp16, x = input_241_cast_fp16)[name = tensor<string, []>("op_6571_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_97_cast_fp16 = add(x = inputs_95_cast_fp16, y = var_6571_cast_fp16)[name = tensor<string, []>("inputs_97_cast_fp16")];
+            tensor<int32, []> var_6580 = const()[name = tensor<string, []>("op_6580"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_243_axes_0 = const()[name = tensor<string, []>("input_243_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_243_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_243_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(958769024)))];
+            tensor<fp16, [1280]> input_243_beta_0_to_fp16 = const()[name = tensor<string, []>("input_243_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(958771648)))];
+            tensor<fp16, []> var_6596_to_fp16 = const()[name = tensor<string, []>("op_6596_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_243_cast_fp16 = layer_norm(axes = input_243_axes_0, beta = input_243_beta_0_to_fp16, epsilon = var_6596_to_fp16, gamma = input_243_gamma_0_to_fp16, x = inputs_97_cast_fp16)[name = tensor<string, []>("input_243_cast_fp16")];
+            tensor<string, []> q_49_pad_type_0 = const()[name = tensor<string, []>("q_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_49_strides_0 = const()[name = tensor<string, []>("q_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_49_pad_0 = const()[name = tensor<string, []>("q_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_49_dilations_0 = const()[name = tensor<string, []>("q_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_49_groups_0 = const()[name = tensor<string, []>("q_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_6631_weight_0_to_fp16 = const()[name = tensor<string, []>("op_6631_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(958774272)))];
+            tensor<fp16, [1280]> var_6631_bias_0_to_fp16 = const()[name = tensor<string, []>("op_6631_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(962051136)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6631_cast_fp16 = conv(bias = var_6631_bias_0_to_fp16, dilations = q_49_dilations_0, groups = q_49_groups_0, pad = q_49_pad_0, pad_type = q_49_pad_type_0, strides = q_49_strides_0, weight = var_6631_weight_0_to_fp16, x = input_243_cast_fp16)[name = tensor<string, []>("op_6631_cast_fp16")];
+            tensor<string, []> k_49_pad_type_0 = const()[name = tensor<string, []>("k_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_49_strides_0 = const()[name = tensor<string, []>("k_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_49_pad_0 = const()[name = tensor<string, []>("k_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_49_dilations_0 = const()[name = tensor<string, []>("k_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_49_groups_0 = const()[name = tensor<string, []>("k_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_24_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_24_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(962053760)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_49_cast_fp16 = conv(dilations = k_49_dilations_0, groups = k_49_groups_0, pad = k_49_pad_0, pad_type = k_49_pad_type_0, strides = k_49_strides_0, weight = blocks_24_attn_key_weight_to_fp16, x = input_243_cast_fp16)[name = tensor<string, []>("k_49_cast_fp16")];
+            tensor<string, []> var_6629_pad_type_0 = const()[name = tensor<string, []>("op_6629_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6629_strides_0 = const()[name = tensor<string, []>("op_6629_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6629_pad_0 = const()[name = tensor<string, []>("op_6629_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6629_dilations_0 = const()[name = tensor<string, []>("op_6629_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6629_groups_0 = const()[name = tensor<string, []>("op_6629_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_24_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_24_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(965330624)))];
+            tensor<fp16, [1280]> blocks_24_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_24_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(968607488)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6629_cast_fp16 = conv(bias = blocks_24_attn_value_bias_to_fp16, dilations = var_6629_dilations_0, groups = var_6629_groups_0, pad = var_6629_pad_0, pad_type = var_6629_pad_type_0, strides = var_6629_strides_0, weight = blocks_24_attn_value_weight_to_fp16, x = input_243_cast_fp16)[name = tensor<string, []>("op_6629_cast_fp16")];
+            tensor<int32, [20]> tile_72 = const()[name = tensor<string, []>("tile_72"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6632_axis_0 = const()[name = tensor<string, []>("op_6632_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_19 = split(axis = var_6632_axis_0, split_sizes = tile_72, x = var_6631_cast_fp16)[name = tensor<string, []>("op_6632_cast_fp16")];
+            tensor<int32, [4]> var_6653_perm_0 = const()[name = tensor<string, []>("op_6653_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_73 = const()[name = tensor<string, []>("tile_73"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6654_axis_0 = const()[name = tensor<string, []>("op_6654_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_6653_cast_fp16 = transpose(perm = var_6653_perm_0, x = k_49_cast_fp16)[name = tensor<string, []>("transpose_8")];
+            tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_19 = split(axis = var_6654_axis_0, split_sizes = tile_73, x = var_6653_cast_fp16)[name = tensor<string, []>("op_6654_cast_fp16")];
+            tensor<int32, [20]> tile_74 = const()[name = tensor<string, []>("tile_74"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6675_axis_0 = const()[name = tensor<string, []>("op_6675_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_19 = split(axis = var_6675_axis_0, split_sizes = tile_74, x = var_6629_cast_fp16)[name = tensor<string, []>("op_6675_cast_fp16")];
+            tensor<string, []> aw_961_equation_0 = const()[name = tensor<string, []>("aw_961_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_961_cast_fp16 = einsum(equation = aw_961_equation_0, values = (var_6654_cast_fp16_0, var_6632_cast_fp16_0))[name = tensor<string, []>("aw_961_cast_fp16")];
+            tensor<string, []> aw_963_equation_0 = const()[name = tensor<string, []>("aw_963_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_963_cast_fp16 = einsum(equation = aw_963_equation_0, values = (var_6654_cast_fp16_1, var_6632_cast_fp16_1))[name = tensor<string, []>("aw_963_cast_fp16")];
+            tensor<string, []> aw_965_equation_0 = const()[name = tensor<string, []>("aw_965_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_965_cast_fp16 = einsum(equation = aw_965_equation_0, values = (var_6654_cast_fp16_2, var_6632_cast_fp16_2))[name = tensor<string, []>("aw_965_cast_fp16")];
+            tensor<string, []> aw_967_equation_0 = const()[name = tensor<string, []>("aw_967_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_967_cast_fp16 = einsum(equation = aw_967_equation_0, values = (var_6654_cast_fp16_3, var_6632_cast_fp16_3))[name = tensor<string, []>("aw_967_cast_fp16")];
+            tensor<string, []> aw_969_equation_0 = const()[name = tensor<string, []>("aw_969_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_969_cast_fp16 = einsum(equation = aw_969_equation_0, values = (var_6654_cast_fp16_4, var_6632_cast_fp16_4))[name = tensor<string, []>("aw_969_cast_fp16")];
+            tensor<string, []> aw_971_equation_0 = const()[name = tensor<string, []>("aw_971_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_971_cast_fp16 = einsum(equation = aw_971_equation_0, values = (var_6654_cast_fp16_5, var_6632_cast_fp16_5))[name = tensor<string, []>("aw_971_cast_fp16")];
+            tensor<string, []> aw_973_equation_0 = const()[name = tensor<string, []>("aw_973_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_973_cast_fp16 = einsum(equation = aw_973_equation_0, values = (var_6654_cast_fp16_6, var_6632_cast_fp16_6))[name = tensor<string, []>("aw_973_cast_fp16")];
+            tensor<string, []> aw_975_equation_0 = const()[name = tensor<string, []>("aw_975_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_975_cast_fp16 = einsum(equation = aw_975_equation_0, values = (var_6654_cast_fp16_7, var_6632_cast_fp16_7))[name = tensor<string, []>("aw_975_cast_fp16")];
+            tensor<string, []> aw_977_equation_0 = const()[name = tensor<string, []>("aw_977_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_977_cast_fp16 = einsum(equation = aw_977_equation_0, values = (var_6654_cast_fp16_8, var_6632_cast_fp16_8))[name = tensor<string, []>("aw_977_cast_fp16")];
+            tensor<string, []> aw_979_equation_0 = const()[name = tensor<string, []>("aw_979_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_979_cast_fp16 = einsum(equation = aw_979_equation_0, values = (var_6654_cast_fp16_9, var_6632_cast_fp16_9))[name = tensor<string, []>("aw_979_cast_fp16")];
+            tensor<string, []> aw_981_equation_0 = const()[name = tensor<string, []>("aw_981_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_981_cast_fp16 = einsum(equation = aw_981_equation_0, values = (var_6654_cast_fp16_10, var_6632_cast_fp16_10))[name = tensor<string, []>("aw_981_cast_fp16")];
+            tensor<string, []> aw_983_equation_0 = const()[name = tensor<string, []>("aw_983_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_983_cast_fp16 = einsum(equation = aw_983_equation_0, values = (var_6654_cast_fp16_11, var_6632_cast_fp16_11))[name = tensor<string, []>("aw_983_cast_fp16")];
+            tensor<string, []> aw_985_equation_0 = const()[name = tensor<string, []>("aw_985_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_985_cast_fp16 = einsum(equation = aw_985_equation_0, values = (var_6654_cast_fp16_12, var_6632_cast_fp16_12))[name = tensor<string, []>("aw_985_cast_fp16")];
+            tensor<string, []> aw_987_equation_0 = const()[name = tensor<string, []>("aw_987_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_987_cast_fp16 = einsum(equation = aw_987_equation_0, values = (var_6654_cast_fp16_13, var_6632_cast_fp16_13))[name = tensor<string, []>("aw_987_cast_fp16")];
+            tensor<string, []> aw_989_equation_0 = const()[name = tensor<string, []>("aw_989_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_989_cast_fp16 = einsum(equation = aw_989_equation_0, values = (var_6654_cast_fp16_14, var_6632_cast_fp16_14))[name = tensor<string, []>("aw_989_cast_fp16")];
+            tensor<string, []> aw_991_equation_0 = const()[name = tensor<string, []>("aw_991_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_991_cast_fp16 = einsum(equation = aw_991_equation_0, values = (var_6654_cast_fp16_15, var_6632_cast_fp16_15))[name = tensor<string, []>("aw_991_cast_fp16")];
+            tensor<string, []> aw_993_equation_0 = const()[name = tensor<string, []>("aw_993_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_993_cast_fp16 = einsum(equation = aw_993_equation_0, values = (var_6654_cast_fp16_16, var_6632_cast_fp16_16))[name = tensor<string, []>("aw_993_cast_fp16")];
+            tensor<string, []> aw_995_equation_0 = const()[name = tensor<string, []>("aw_995_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_995_cast_fp16 = einsum(equation = aw_995_equation_0, values = (var_6654_cast_fp16_17, var_6632_cast_fp16_17))[name = tensor<string, []>("aw_995_cast_fp16")];
+            tensor<string, []> aw_997_equation_0 = const()[name = tensor<string, []>("aw_997_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_997_cast_fp16 = einsum(equation = aw_997_equation_0, values = (var_6654_cast_fp16_18, var_6632_cast_fp16_18))[name = tensor<string, []>("aw_997_cast_fp16")];
+            tensor<string, []> aw_999_equation_0 = const()[name = tensor<string, []>("aw_999_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_999_cast_fp16 = einsum(equation = aw_999_equation_0, values = (var_6654_cast_fp16_19, var_6632_cast_fp16_19))[name = tensor<string, []>("aw_999_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6736_cast_fp16 = softmax(axis = var_6580, x = aw_961_cast_fp16)[name = tensor<string, []>("op_6736_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6737_cast_fp16 = softmax(axis = var_6580, x = aw_963_cast_fp16)[name = tensor<string, []>("op_6737_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6738_cast_fp16 = softmax(axis = var_6580, x = aw_965_cast_fp16)[name = tensor<string, []>("op_6738_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6739_cast_fp16 = softmax(axis = var_6580, x = aw_967_cast_fp16)[name = tensor<string, []>("op_6739_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6740_cast_fp16 = softmax(axis = var_6580, x = aw_969_cast_fp16)[name = tensor<string, []>("op_6740_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6741_cast_fp16 = softmax(axis = var_6580, x = aw_971_cast_fp16)[name = tensor<string, []>("op_6741_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6742_cast_fp16 = softmax(axis = var_6580, x = aw_973_cast_fp16)[name = tensor<string, []>("op_6742_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6743_cast_fp16 = softmax(axis = var_6580, x = aw_975_cast_fp16)[name = tensor<string, []>("op_6743_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6744_cast_fp16 = softmax(axis = var_6580, x = aw_977_cast_fp16)[name = tensor<string, []>("op_6744_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6745_cast_fp16 = softmax(axis = var_6580, x = aw_979_cast_fp16)[name = tensor<string, []>("op_6745_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6746_cast_fp16 = softmax(axis = var_6580, x = aw_981_cast_fp16)[name = tensor<string, []>("op_6746_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6747_cast_fp16 = softmax(axis = var_6580, x = aw_983_cast_fp16)[name = tensor<string, []>("op_6747_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6748_cast_fp16 = softmax(axis = var_6580, x = aw_985_cast_fp16)[name = tensor<string, []>("op_6748_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6749_cast_fp16 = softmax(axis = var_6580, x = aw_987_cast_fp16)[name = tensor<string, []>("op_6749_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6750_cast_fp16 = softmax(axis = var_6580, x = aw_989_cast_fp16)[name = tensor<string, []>("op_6750_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6751_cast_fp16 = softmax(axis = var_6580, x = aw_991_cast_fp16)[name = tensor<string, []>("op_6751_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6752_cast_fp16 = softmax(axis = var_6580, x = aw_993_cast_fp16)[name = tensor<string, []>("op_6752_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6753_cast_fp16 = softmax(axis = var_6580, x = aw_995_cast_fp16)[name = tensor<string, []>("op_6753_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6754_cast_fp16 = softmax(axis = var_6580, x = aw_997_cast_fp16)[name = tensor<string, []>("op_6754_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6755_cast_fp16 = softmax(axis = var_6580, x = aw_999_cast_fp16)[name = tensor<string, []>("op_6755_cast_fp16")];
+            tensor<string, []> var_6757_equation_0 = const()[name = tensor<string, []>("op_6757_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6757_cast_fp16 = einsum(equation = var_6757_equation_0, values = (var_6675_cast_fp16_0, var_6736_cast_fp16))[name = tensor<string, []>("op_6757_cast_fp16")];
+            tensor<string, []> var_6759_equation_0 = const()[name = tensor<string, []>("op_6759_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6759_cast_fp16 = einsum(equation = var_6759_equation_0, values = (var_6675_cast_fp16_1, var_6737_cast_fp16))[name = tensor<string, []>("op_6759_cast_fp16")];
+            tensor<string, []> var_6761_equation_0 = const()[name = tensor<string, []>("op_6761_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6761_cast_fp16 = einsum(equation = var_6761_equation_0, values = (var_6675_cast_fp16_2, var_6738_cast_fp16))[name = tensor<string, []>("op_6761_cast_fp16")];
+            tensor<string, []> var_6763_equation_0 = const()[name = tensor<string, []>("op_6763_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6763_cast_fp16 = einsum(equation = var_6763_equation_0, values = (var_6675_cast_fp16_3, var_6739_cast_fp16))[name = tensor<string, []>("op_6763_cast_fp16")];
+            tensor<string, []> var_6765_equation_0 = const()[name = tensor<string, []>("op_6765_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6765_cast_fp16 = einsum(equation = var_6765_equation_0, values = (var_6675_cast_fp16_4, var_6740_cast_fp16))[name = tensor<string, []>("op_6765_cast_fp16")];
+            tensor<string, []> var_6767_equation_0 = const()[name = tensor<string, []>("op_6767_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6767_cast_fp16 = einsum(equation = var_6767_equation_0, values = (var_6675_cast_fp16_5, var_6741_cast_fp16))[name = tensor<string, []>("op_6767_cast_fp16")];
+            tensor<string, []> var_6769_equation_0 = const()[name = tensor<string, []>("op_6769_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6769_cast_fp16 = einsum(equation = var_6769_equation_0, values = (var_6675_cast_fp16_6, var_6742_cast_fp16))[name = tensor<string, []>("op_6769_cast_fp16")];
+            tensor<string, []> var_6771_equation_0 = const()[name = tensor<string, []>("op_6771_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6771_cast_fp16 = einsum(equation = var_6771_equation_0, values = (var_6675_cast_fp16_7, var_6743_cast_fp16))[name = tensor<string, []>("op_6771_cast_fp16")];
+            tensor<string, []> var_6773_equation_0 = const()[name = tensor<string, []>("op_6773_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6773_cast_fp16 = einsum(equation = var_6773_equation_0, values = (var_6675_cast_fp16_8, var_6744_cast_fp16))[name = tensor<string, []>("op_6773_cast_fp16")];
+            tensor<string, []> var_6775_equation_0 = const()[name = tensor<string, []>("op_6775_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6775_cast_fp16 = einsum(equation = var_6775_equation_0, values = (var_6675_cast_fp16_9, var_6745_cast_fp16))[name = tensor<string, []>("op_6775_cast_fp16")];
+            tensor<string, []> var_6777_equation_0 = const()[name = tensor<string, []>("op_6777_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6777_cast_fp16 = einsum(equation = var_6777_equation_0, values = (var_6675_cast_fp16_10, var_6746_cast_fp16))[name = tensor<string, []>("op_6777_cast_fp16")];
+            tensor<string, []> var_6779_equation_0 = const()[name = tensor<string, []>("op_6779_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6779_cast_fp16 = einsum(equation = var_6779_equation_0, values = (var_6675_cast_fp16_11, var_6747_cast_fp16))[name = tensor<string, []>("op_6779_cast_fp16")];
+            tensor<string, []> var_6781_equation_0 = const()[name = tensor<string, []>("op_6781_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6781_cast_fp16 = einsum(equation = var_6781_equation_0, values = (var_6675_cast_fp16_12, var_6748_cast_fp16))[name = tensor<string, []>("op_6781_cast_fp16")];
+            tensor<string, []> var_6783_equation_0 = const()[name = tensor<string, []>("op_6783_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6783_cast_fp16 = einsum(equation = var_6783_equation_0, values = (var_6675_cast_fp16_13, var_6749_cast_fp16))[name = tensor<string, []>("op_6783_cast_fp16")];
+            tensor<string, []> var_6785_equation_0 = const()[name = tensor<string, []>("op_6785_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6785_cast_fp16 = einsum(equation = var_6785_equation_0, values = (var_6675_cast_fp16_14, var_6750_cast_fp16))[name = tensor<string, []>("op_6785_cast_fp16")];
+            tensor<string, []> var_6787_equation_0 = const()[name = tensor<string, []>("op_6787_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6787_cast_fp16 = einsum(equation = var_6787_equation_0, values = (var_6675_cast_fp16_15, var_6751_cast_fp16))[name = tensor<string, []>("op_6787_cast_fp16")];
+            tensor<string, []> var_6789_equation_0 = const()[name = tensor<string, []>("op_6789_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6789_cast_fp16 = einsum(equation = var_6789_equation_0, values = (var_6675_cast_fp16_16, var_6752_cast_fp16))[name = tensor<string, []>("op_6789_cast_fp16")];
+            tensor<string, []> var_6791_equation_0 = const()[name = tensor<string, []>("op_6791_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6791_cast_fp16 = einsum(equation = var_6791_equation_0, values = (var_6675_cast_fp16_17, var_6753_cast_fp16))[name = tensor<string, []>("op_6791_cast_fp16")];
+            tensor<string, []> var_6793_equation_0 = const()[name = tensor<string, []>("op_6793_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6793_cast_fp16 = einsum(equation = var_6793_equation_0, values = (var_6675_cast_fp16_18, var_6754_cast_fp16))[name = tensor<string, []>("op_6793_cast_fp16")];
+            tensor<string, []> var_6795_equation_0 = const()[name = tensor<string, []>("op_6795_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6795_cast_fp16 = einsum(equation = var_6795_equation_0, values = (var_6675_cast_fp16_19, var_6755_cast_fp16))[name = tensor<string, []>("op_6795_cast_fp16")];
+            tensor<bool, []> input_245_interleave_0 = const()[name = tensor<string, []>("input_245_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_245_cast_fp16 = concat(axis = var_6580, interleave = input_245_interleave_0, values = (var_6757_cast_fp16, var_6759_cast_fp16, var_6761_cast_fp16, var_6763_cast_fp16, var_6765_cast_fp16, var_6767_cast_fp16, var_6769_cast_fp16, var_6771_cast_fp16, var_6773_cast_fp16, var_6775_cast_fp16, var_6777_cast_fp16, var_6779_cast_fp16, var_6781_cast_fp16, var_6783_cast_fp16, var_6785_cast_fp16, var_6787_cast_fp16, var_6789_cast_fp16, var_6791_cast_fp16, var_6793_cast_fp16, var_6795_cast_fp16))[name = tensor<string, []>("input_245_cast_fp16")];
+            tensor<string, []> var_6804_pad_type_0 = const()[name = tensor<string, []>("op_6804_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6804_strides_0 = const()[name = tensor<string, []>("op_6804_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6804_pad_0 = const()[name = tensor<string, []>("op_6804_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6804_dilations_0 = const()[name = tensor<string, []>("op_6804_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6804_groups_0 = const()[name = tensor<string, []>("op_6804_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_24_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_24_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(968610112)))];
+            tensor<fp16, [1280]> blocks_24_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_24_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(971886976)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6804_cast_fp16 = conv(bias = blocks_24_attn_out_bias_to_fp16, dilations = var_6804_dilations_0, groups = var_6804_groups_0, pad = var_6804_pad_0, pad_type = var_6804_pad_type_0, strides = var_6804_strides_0, weight = blocks_24_attn_out_weight_to_fp16, x = input_245_cast_fp16)[name = tensor<string, []>("op_6804_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_99_cast_fp16 = add(x = inputs_97_cast_fp16, y = var_6804_cast_fp16)[name = tensor<string, []>("inputs_99_cast_fp16")];
+            tensor<int32, [1]> input_247_axes_0 = const()[name = tensor<string, []>("input_247_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_247_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_247_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(971889600)))];
+            tensor<fp16, [1280]> input_247_beta_0_to_fp16 = const()[name = tensor<string, []>("input_247_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(971892224)))];
+            tensor<fp16, []> var_6814_to_fp16 = const()[name = tensor<string, []>("op_6814_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_247_cast_fp16 = layer_norm(axes = input_247_axes_0, beta = input_247_beta_0_to_fp16, epsilon = var_6814_to_fp16, gamma = input_247_gamma_0_to_fp16, x = inputs_99_cast_fp16)[name = tensor<string, []>("input_247_cast_fp16")];
+            tensor<string, []> input_249_pad_type_0 = const()[name = tensor<string, []>("input_249_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_249_strides_0 = const()[name = tensor<string, []>("input_249_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_249_pad_0 = const()[name = tensor<string, []>("input_249_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_249_dilations_0 = const()[name = tensor<string, []>("input_249_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_249_groups_0 = const()[name = tensor<string, []>("input_249_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_24_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_24_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(971894848)))];
+            tensor<fp16, [5120]> blocks_24_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_24_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(985002112)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_249_cast_fp16 = conv(bias = blocks_24_mlp_0_bias_to_fp16, dilations = input_249_dilations_0, groups = input_249_groups_0, pad = input_249_pad_0, pad_type = input_249_pad_type_0, strides = input_249_strides_0, weight = blocks_24_mlp_0_weight_to_fp16, x = input_247_cast_fp16)[name = tensor<string, []>("input_249_cast_fp16")];
+            tensor<string, []> input_251_mode_0 = const()[name = tensor<string, []>("input_251_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_251_cast_fp16 = gelu(mode = input_251_mode_0, x = input_249_cast_fp16)[name = tensor<string, []>("input_251_cast_fp16")];
+            tensor<string, []> var_6840_pad_type_0 = const()[name = tensor<string, []>("op_6840_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6840_strides_0 = const()[name = tensor<string, []>("op_6840_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6840_pad_0 = const()[name = tensor<string, []>("op_6840_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6840_dilations_0 = const()[name = tensor<string, []>("op_6840_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6840_groups_0 = const()[name = tensor<string, []>("op_6840_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_24_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_24_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(985012416)))];
+            tensor<fp16, [1280]> blocks_24_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_24_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(998119680)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6840_cast_fp16 = conv(bias = blocks_24_mlp_2_bias_to_fp16, dilations = var_6840_dilations_0, groups = var_6840_groups_0, pad = var_6840_pad_0, pad_type = var_6840_pad_type_0, strides = var_6840_strides_0, weight = blocks_24_mlp_2_weight_to_fp16, x = input_251_cast_fp16)[name = tensor<string, []>("op_6840_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_101_cast_fp16 = add(x = inputs_99_cast_fp16, y = var_6840_cast_fp16)[name = tensor<string, []>("inputs_101_cast_fp16")];
+            tensor<int32, []> var_6849 = const()[name = tensor<string, []>("op_6849"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_253_axes_0 = const()[name = tensor<string, []>("input_253_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_253_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_253_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(998122304)))];
+            tensor<fp16, [1280]> input_253_beta_0_to_fp16 = const()[name = tensor<string, []>("input_253_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(998124928)))];
+            tensor<fp16, []> var_6865_to_fp16 = const()[name = tensor<string, []>("op_6865_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_253_cast_fp16 = layer_norm(axes = input_253_axes_0, beta = input_253_beta_0_to_fp16, epsilon = var_6865_to_fp16, gamma = input_253_gamma_0_to_fp16, x = inputs_101_cast_fp16)[name = tensor<string, []>("input_253_cast_fp16")];
+            tensor<string, []> q_51_pad_type_0 = const()[name = tensor<string, []>("q_51_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_51_strides_0 = const()[name = tensor<string, []>("q_51_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_51_pad_0 = const()[name = tensor<string, []>("q_51_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_51_dilations_0 = const()[name = tensor<string, []>("q_51_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_51_groups_0 = const()[name = tensor<string, []>("q_51_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_6900_weight_0_to_fp16 = const()[name = tensor<string, []>("op_6900_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(998127552)))];
+            tensor<fp16, [1280]> var_6900_bias_0_to_fp16 = const()[name = tensor<string, []>("op_6900_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1001404416)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6900_cast_fp16 = conv(bias = var_6900_bias_0_to_fp16, dilations = q_51_dilations_0, groups = q_51_groups_0, pad = q_51_pad_0, pad_type = q_51_pad_type_0, strides = q_51_strides_0, weight = var_6900_weight_0_to_fp16, x = input_253_cast_fp16)[name = tensor<string, []>("op_6900_cast_fp16")];
+            tensor<string, []> k_51_pad_type_0 = const()[name = tensor<string, []>("k_51_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_51_strides_0 = const()[name = tensor<string, []>("k_51_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_51_pad_0 = const()[name = tensor<string, []>("k_51_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_51_dilations_0 = const()[name = tensor<string, []>("k_51_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_51_groups_0 = const()[name = tensor<string, []>("k_51_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_25_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_25_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1001407040)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_51_cast_fp16 = conv(dilations = k_51_dilations_0, groups = k_51_groups_0, pad = k_51_pad_0, pad_type = k_51_pad_type_0, strides = k_51_strides_0, weight = blocks_25_attn_key_weight_to_fp16, x = input_253_cast_fp16)[name = tensor<string, []>("k_51_cast_fp16")];
+            tensor<string, []> var_6898_pad_type_0 = const()[name = tensor<string, []>("op_6898_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6898_strides_0 = const()[name = tensor<string, []>("op_6898_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6898_pad_0 = const()[name = tensor<string, []>("op_6898_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6898_dilations_0 = const()[name = tensor<string, []>("op_6898_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6898_groups_0 = const()[name = tensor<string, []>("op_6898_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_25_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_25_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1004683904)))];
+            tensor<fp16, [1280]> blocks_25_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_25_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1007960768)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6898_cast_fp16 = conv(bias = blocks_25_attn_value_bias_to_fp16, dilations = var_6898_dilations_0, groups = var_6898_groups_0, pad = var_6898_pad_0, pad_type = var_6898_pad_type_0, strides = var_6898_strides_0, weight = blocks_25_attn_value_weight_to_fp16, x = input_253_cast_fp16)[name = tensor<string, []>("op_6898_cast_fp16")];
+            tensor<int32, [20]> tile_75 = const()[name = tensor<string, []>("tile_75"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6901_axis_0 = const()[name = tensor<string, []>("op_6901_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_19 = split(axis = var_6901_axis_0, split_sizes = tile_75, x = var_6900_cast_fp16)[name = tensor<string, []>("op_6901_cast_fp16")];
+            tensor<int32, [4]> var_6922_perm_0 = const()[name = tensor<string, []>("op_6922_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_76 = const()[name = tensor<string, []>("tile_76"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6923_axis_0 = const()[name = tensor<string, []>("op_6923_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_6922_cast_fp16 = transpose(perm = var_6922_perm_0, x = k_51_cast_fp16)[name = tensor<string, []>("transpose_7")];
+            tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_19 = split(axis = var_6923_axis_0, split_sizes = tile_76, x = var_6922_cast_fp16)[name = tensor<string, []>("op_6923_cast_fp16")];
+            tensor<int32, [20]> tile_77 = const()[name = tensor<string, []>("tile_77"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6944_axis_0 = const()[name = tensor<string, []>("op_6944_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_19 = split(axis = var_6944_axis_0, split_sizes = tile_77, x = var_6898_cast_fp16)[name = tensor<string, []>("op_6944_cast_fp16")];
+            tensor<string, []> aw_1001_equation_0 = const()[name = tensor<string, []>("aw_1001_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1001_cast_fp16 = einsum(equation = aw_1001_equation_0, values = (var_6923_cast_fp16_0, var_6901_cast_fp16_0))[name = tensor<string, []>("aw_1001_cast_fp16")];
+            tensor<string, []> aw_1003_equation_0 = const()[name = tensor<string, []>("aw_1003_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1003_cast_fp16 = einsum(equation = aw_1003_equation_0, values = (var_6923_cast_fp16_1, var_6901_cast_fp16_1))[name = tensor<string, []>("aw_1003_cast_fp16")];
+            tensor<string, []> aw_1005_equation_0 = const()[name = tensor<string, []>("aw_1005_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1005_cast_fp16 = einsum(equation = aw_1005_equation_0, values = (var_6923_cast_fp16_2, var_6901_cast_fp16_2))[name = tensor<string, []>("aw_1005_cast_fp16")];
+            tensor<string, []> aw_1007_equation_0 = const()[name = tensor<string, []>("aw_1007_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1007_cast_fp16 = einsum(equation = aw_1007_equation_0, values = (var_6923_cast_fp16_3, var_6901_cast_fp16_3))[name = tensor<string, []>("aw_1007_cast_fp16")];
+            tensor<string, []> aw_1009_equation_0 = const()[name = tensor<string, []>("aw_1009_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1009_cast_fp16 = einsum(equation = aw_1009_equation_0, values = (var_6923_cast_fp16_4, var_6901_cast_fp16_4))[name = tensor<string, []>("aw_1009_cast_fp16")];
+            tensor<string, []> aw_1011_equation_0 = const()[name = tensor<string, []>("aw_1011_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1011_cast_fp16 = einsum(equation = aw_1011_equation_0, values = (var_6923_cast_fp16_5, var_6901_cast_fp16_5))[name = tensor<string, []>("aw_1011_cast_fp16")];
+            tensor<string, []> aw_1013_equation_0 = const()[name = tensor<string, []>("aw_1013_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1013_cast_fp16 = einsum(equation = aw_1013_equation_0, values = (var_6923_cast_fp16_6, var_6901_cast_fp16_6))[name = tensor<string, []>("aw_1013_cast_fp16")];
+            tensor<string, []> aw_1015_equation_0 = const()[name = tensor<string, []>("aw_1015_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1015_cast_fp16 = einsum(equation = aw_1015_equation_0, values = (var_6923_cast_fp16_7, var_6901_cast_fp16_7))[name = tensor<string, []>("aw_1015_cast_fp16")];
+            tensor<string, []> aw_1017_equation_0 = const()[name = tensor<string, []>("aw_1017_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1017_cast_fp16 = einsum(equation = aw_1017_equation_0, values = (var_6923_cast_fp16_8, var_6901_cast_fp16_8))[name = tensor<string, []>("aw_1017_cast_fp16")];
+            tensor<string, []> aw_1019_equation_0 = const()[name = tensor<string, []>("aw_1019_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1019_cast_fp16 = einsum(equation = aw_1019_equation_0, values = (var_6923_cast_fp16_9, var_6901_cast_fp16_9))[name = tensor<string, []>("aw_1019_cast_fp16")];
+            tensor<string, []> aw_1021_equation_0 = const()[name = tensor<string, []>("aw_1021_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1021_cast_fp16 = einsum(equation = aw_1021_equation_0, values = (var_6923_cast_fp16_10, var_6901_cast_fp16_10))[name = tensor<string, []>("aw_1021_cast_fp16")];
+            tensor<string, []> aw_1023_equation_0 = const()[name = tensor<string, []>("aw_1023_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1023_cast_fp16 = einsum(equation = aw_1023_equation_0, values = (var_6923_cast_fp16_11, var_6901_cast_fp16_11))[name = tensor<string, []>("aw_1023_cast_fp16")];
+            tensor<string, []> aw_1025_equation_0 = const()[name = tensor<string, []>("aw_1025_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1025_cast_fp16 = einsum(equation = aw_1025_equation_0, values = (var_6923_cast_fp16_12, var_6901_cast_fp16_12))[name = tensor<string, []>("aw_1025_cast_fp16")];
+            tensor<string, []> aw_1027_equation_0 = const()[name = tensor<string, []>("aw_1027_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1027_cast_fp16 = einsum(equation = aw_1027_equation_0, values = (var_6923_cast_fp16_13, var_6901_cast_fp16_13))[name = tensor<string, []>("aw_1027_cast_fp16")];
+            tensor<string, []> aw_1029_equation_0 = const()[name = tensor<string, []>("aw_1029_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1029_cast_fp16 = einsum(equation = aw_1029_equation_0, values = (var_6923_cast_fp16_14, var_6901_cast_fp16_14))[name = tensor<string, []>("aw_1029_cast_fp16")];
+            tensor<string, []> aw_1031_equation_0 = const()[name = tensor<string, []>("aw_1031_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1031_cast_fp16 = einsum(equation = aw_1031_equation_0, values = (var_6923_cast_fp16_15, var_6901_cast_fp16_15))[name = tensor<string, []>("aw_1031_cast_fp16")];
+            tensor<string, []> aw_1033_equation_0 = const()[name = tensor<string, []>("aw_1033_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1033_cast_fp16 = einsum(equation = aw_1033_equation_0, values = (var_6923_cast_fp16_16, var_6901_cast_fp16_16))[name = tensor<string, []>("aw_1033_cast_fp16")];
+            tensor<string, []> aw_1035_equation_0 = const()[name = tensor<string, []>("aw_1035_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1035_cast_fp16 = einsum(equation = aw_1035_equation_0, values = (var_6923_cast_fp16_17, var_6901_cast_fp16_17))[name = tensor<string, []>("aw_1035_cast_fp16")];
+            tensor<string, []> aw_1037_equation_0 = const()[name = tensor<string, []>("aw_1037_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1037_cast_fp16 = einsum(equation = aw_1037_equation_0, values = (var_6923_cast_fp16_18, var_6901_cast_fp16_18))[name = tensor<string, []>("aw_1037_cast_fp16")];
+            tensor<string, []> aw_1039_equation_0 = const()[name = tensor<string, []>("aw_1039_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1039_cast_fp16 = einsum(equation = aw_1039_equation_0, values = (var_6923_cast_fp16_19, var_6901_cast_fp16_19))[name = tensor<string, []>("aw_1039_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7005_cast_fp16 = softmax(axis = var_6849, x = aw_1001_cast_fp16)[name = tensor<string, []>("op_7005_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7006_cast_fp16 = softmax(axis = var_6849, x = aw_1003_cast_fp16)[name = tensor<string, []>("op_7006_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7007_cast_fp16 = softmax(axis = var_6849, x = aw_1005_cast_fp16)[name = tensor<string, []>("op_7007_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7008_cast_fp16 = softmax(axis = var_6849, x = aw_1007_cast_fp16)[name = tensor<string, []>("op_7008_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7009_cast_fp16 = softmax(axis = var_6849, x = aw_1009_cast_fp16)[name = tensor<string, []>("op_7009_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7010_cast_fp16 = softmax(axis = var_6849, x = aw_1011_cast_fp16)[name = tensor<string, []>("op_7010_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7011_cast_fp16 = softmax(axis = var_6849, x = aw_1013_cast_fp16)[name = tensor<string, []>("op_7011_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7012_cast_fp16 = softmax(axis = var_6849, x = aw_1015_cast_fp16)[name = tensor<string, []>("op_7012_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7013_cast_fp16 = softmax(axis = var_6849, x = aw_1017_cast_fp16)[name = tensor<string, []>("op_7013_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7014_cast_fp16 = softmax(axis = var_6849, x = aw_1019_cast_fp16)[name = tensor<string, []>("op_7014_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7015_cast_fp16 = softmax(axis = var_6849, x = aw_1021_cast_fp16)[name = tensor<string, []>("op_7015_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7016_cast_fp16 = softmax(axis = var_6849, x = aw_1023_cast_fp16)[name = tensor<string, []>("op_7016_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7017_cast_fp16 = softmax(axis = var_6849, x = aw_1025_cast_fp16)[name = tensor<string, []>("op_7017_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7018_cast_fp16 = softmax(axis = var_6849, x = aw_1027_cast_fp16)[name = tensor<string, []>("op_7018_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7019_cast_fp16 = softmax(axis = var_6849, x = aw_1029_cast_fp16)[name = tensor<string, []>("op_7019_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7020_cast_fp16 = softmax(axis = var_6849, x = aw_1031_cast_fp16)[name = tensor<string, []>("op_7020_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7021_cast_fp16 = softmax(axis = var_6849, x = aw_1033_cast_fp16)[name = tensor<string, []>("op_7021_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7022_cast_fp16 = softmax(axis = var_6849, x = aw_1035_cast_fp16)[name = tensor<string, []>("op_7022_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7023_cast_fp16 = softmax(axis = var_6849, x = aw_1037_cast_fp16)[name = tensor<string, []>("op_7023_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7024_cast_fp16 = softmax(axis = var_6849, x = aw_1039_cast_fp16)[name = tensor<string, []>("op_7024_cast_fp16")];
+            tensor<string, []> var_7026_equation_0 = const()[name = tensor<string, []>("op_7026_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7026_cast_fp16 = einsum(equation = var_7026_equation_0, values = (var_6944_cast_fp16_0, var_7005_cast_fp16))[name = tensor<string, []>("op_7026_cast_fp16")];
+            tensor<string, []> var_7028_equation_0 = const()[name = tensor<string, []>("op_7028_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7028_cast_fp16 = einsum(equation = var_7028_equation_0, values = (var_6944_cast_fp16_1, var_7006_cast_fp16))[name = tensor<string, []>("op_7028_cast_fp16")];
+            tensor<string, []> var_7030_equation_0 = const()[name = tensor<string, []>("op_7030_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7030_cast_fp16 = einsum(equation = var_7030_equation_0, values = (var_6944_cast_fp16_2, var_7007_cast_fp16))[name = tensor<string, []>("op_7030_cast_fp16")];
+            tensor<string, []> var_7032_equation_0 = const()[name = tensor<string, []>("op_7032_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7032_cast_fp16 = einsum(equation = var_7032_equation_0, values = (var_6944_cast_fp16_3, var_7008_cast_fp16))[name = tensor<string, []>("op_7032_cast_fp16")];
+            tensor<string, []> var_7034_equation_0 = const()[name = tensor<string, []>("op_7034_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7034_cast_fp16 = einsum(equation = var_7034_equation_0, values = (var_6944_cast_fp16_4, var_7009_cast_fp16))[name = tensor<string, []>("op_7034_cast_fp16")];
+            tensor<string, []> var_7036_equation_0 = const()[name = tensor<string, []>("op_7036_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7036_cast_fp16 = einsum(equation = var_7036_equation_0, values = (var_6944_cast_fp16_5, var_7010_cast_fp16))[name = tensor<string, []>("op_7036_cast_fp16")];
+            tensor<string, []> var_7038_equation_0 = const()[name = tensor<string, []>("op_7038_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7038_cast_fp16 = einsum(equation = var_7038_equation_0, values = (var_6944_cast_fp16_6, var_7011_cast_fp16))[name = tensor<string, []>("op_7038_cast_fp16")];
+            tensor<string, []> var_7040_equation_0 = const()[name = tensor<string, []>("op_7040_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7040_cast_fp16 = einsum(equation = var_7040_equation_0, values = (var_6944_cast_fp16_7, var_7012_cast_fp16))[name = tensor<string, []>("op_7040_cast_fp16")];
+            tensor<string, []> var_7042_equation_0 = const()[name = tensor<string, []>("op_7042_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7042_cast_fp16 = einsum(equation = var_7042_equation_0, values = (var_6944_cast_fp16_8, var_7013_cast_fp16))[name = tensor<string, []>("op_7042_cast_fp16")];
+            tensor<string, []> var_7044_equation_0 = const()[name = tensor<string, []>("op_7044_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7044_cast_fp16 = einsum(equation = var_7044_equation_0, values = (var_6944_cast_fp16_9, var_7014_cast_fp16))[name = tensor<string, []>("op_7044_cast_fp16")];
+            tensor<string, []> var_7046_equation_0 = const()[name = tensor<string, []>("op_7046_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7046_cast_fp16 = einsum(equation = var_7046_equation_0, values = (var_6944_cast_fp16_10, var_7015_cast_fp16))[name = tensor<string, []>("op_7046_cast_fp16")];
+            tensor<string, []> var_7048_equation_0 = const()[name = tensor<string, []>("op_7048_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7048_cast_fp16 = einsum(equation = var_7048_equation_0, values = (var_6944_cast_fp16_11, var_7016_cast_fp16))[name = tensor<string, []>("op_7048_cast_fp16")];
+            tensor<string, []> var_7050_equation_0 = const()[name = tensor<string, []>("op_7050_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7050_cast_fp16 = einsum(equation = var_7050_equation_0, values = (var_6944_cast_fp16_12, var_7017_cast_fp16))[name = tensor<string, []>("op_7050_cast_fp16")];
+            tensor<string, []> var_7052_equation_0 = const()[name = tensor<string, []>("op_7052_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7052_cast_fp16 = einsum(equation = var_7052_equation_0, values = (var_6944_cast_fp16_13, var_7018_cast_fp16))[name = tensor<string, []>("op_7052_cast_fp16")];
+            tensor<string, []> var_7054_equation_0 = const()[name = tensor<string, []>("op_7054_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7054_cast_fp16 = einsum(equation = var_7054_equation_0, values = (var_6944_cast_fp16_14, var_7019_cast_fp16))[name = tensor<string, []>("op_7054_cast_fp16")];
+            tensor<string, []> var_7056_equation_0 = const()[name = tensor<string, []>("op_7056_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7056_cast_fp16 = einsum(equation = var_7056_equation_0, values = (var_6944_cast_fp16_15, var_7020_cast_fp16))[name = tensor<string, []>("op_7056_cast_fp16")];
+            tensor<string, []> var_7058_equation_0 = const()[name = tensor<string, []>("op_7058_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7058_cast_fp16 = einsum(equation = var_7058_equation_0, values = (var_6944_cast_fp16_16, var_7021_cast_fp16))[name = tensor<string, []>("op_7058_cast_fp16")];
+            tensor<string, []> var_7060_equation_0 = const()[name = tensor<string, []>("op_7060_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7060_cast_fp16 = einsum(equation = var_7060_equation_0, values = (var_6944_cast_fp16_17, var_7022_cast_fp16))[name = tensor<string, []>("op_7060_cast_fp16")];
+            tensor<string, []> var_7062_equation_0 = const()[name = tensor<string, []>("op_7062_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7062_cast_fp16 = einsum(equation = var_7062_equation_0, values = (var_6944_cast_fp16_18, var_7023_cast_fp16))[name = tensor<string, []>("op_7062_cast_fp16")];
+            tensor<string, []> var_7064_equation_0 = const()[name = tensor<string, []>("op_7064_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7064_cast_fp16 = einsum(equation = var_7064_equation_0, values = (var_6944_cast_fp16_19, var_7024_cast_fp16))[name = tensor<string, []>("op_7064_cast_fp16")];
+            tensor<bool, []> input_255_interleave_0 = const()[name = tensor<string, []>("input_255_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_255_cast_fp16 = concat(axis = var_6849, interleave = input_255_interleave_0, values = (var_7026_cast_fp16, var_7028_cast_fp16, var_7030_cast_fp16, var_7032_cast_fp16, var_7034_cast_fp16, var_7036_cast_fp16, var_7038_cast_fp16, var_7040_cast_fp16, var_7042_cast_fp16, var_7044_cast_fp16, var_7046_cast_fp16, var_7048_cast_fp16, var_7050_cast_fp16, var_7052_cast_fp16, var_7054_cast_fp16, var_7056_cast_fp16, var_7058_cast_fp16, var_7060_cast_fp16, var_7062_cast_fp16, var_7064_cast_fp16))[name = tensor<string, []>("input_255_cast_fp16")];
+            tensor<string, []> var_7073_pad_type_0 = const()[name = tensor<string, []>("op_7073_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7073_strides_0 = const()[name = tensor<string, []>("op_7073_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7073_pad_0 = const()[name = tensor<string, []>("op_7073_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7073_dilations_0 = const()[name = tensor<string, []>("op_7073_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7073_groups_0 = const()[name = tensor<string, []>("op_7073_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_25_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_25_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1007963392)))];
+            tensor<fp16, [1280]> blocks_25_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_25_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1011240256)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7073_cast_fp16 = conv(bias = blocks_25_attn_out_bias_to_fp16, dilations = var_7073_dilations_0, groups = var_7073_groups_0, pad = var_7073_pad_0, pad_type = var_7073_pad_type_0, strides = var_7073_strides_0, weight = blocks_25_attn_out_weight_to_fp16, x = input_255_cast_fp16)[name = tensor<string, []>("op_7073_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_103_cast_fp16 = add(x = inputs_101_cast_fp16, y = var_7073_cast_fp16)[name = tensor<string, []>("inputs_103_cast_fp16")];
+            tensor<int32, [1]> input_257_axes_0 = const()[name = tensor<string, []>("input_257_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_257_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_257_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1011242880)))];
+            tensor<fp16, [1280]> input_257_beta_0_to_fp16 = const()[name = tensor<string, []>("input_257_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1011245504)))];
+            tensor<fp16, []> var_7083_to_fp16 = const()[name = tensor<string, []>("op_7083_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_257_cast_fp16 = layer_norm(axes = input_257_axes_0, beta = input_257_beta_0_to_fp16, epsilon = var_7083_to_fp16, gamma = input_257_gamma_0_to_fp16, x = inputs_103_cast_fp16)[name = tensor<string, []>("input_257_cast_fp16")];
+            tensor<string, []> input_259_pad_type_0 = const()[name = tensor<string, []>("input_259_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_259_strides_0 = const()[name = tensor<string, []>("input_259_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_259_pad_0 = const()[name = tensor<string, []>("input_259_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_259_dilations_0 = const()[name = tensor<string, []>("input_259_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_259_groups_0 = const()[name = tensor<string, []>("input_259_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_25_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_25_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1011248128)))];
+            tensor<fp16, [5120]> blocks_25_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_25_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1024355392)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_259_cast_fp16 = conv(bias = blocks_25_mlp_0_bias_to_fp16, dilations = input_259_dilations_0, groups = input_259_groups_0, pad = input_259_pad_0, pad_type = input_259_pad_type_0, strides = input_259_strides_0, weight = blocks_25_mlp_0_weight_to_fp16, x = input_257_cast_fp16)[name = tensor<string, []>("input_259_cast_fp16")];
+            tensor<string, []> input_261_mode_0 = const()[name = tensor<string, []>("input_261_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_261_cast_fp16 = gelu(mode = input_261_mode_0, x = input_259_cast_fp16)[name = tensor<string, []>("input_261_cast_fp16")];
+            tensor<string, []> var_7109_pad_type_0 = const()[name = tensor<string, []>("op_7109_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7109_strides_0 = const()[name = tensor<string, []>("op_7109_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7109_pad_0 = const()[name = tensor<string, []>("op_7109_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7109_dilations_0 = const()[name = tensor<string, []>("op_7109_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7109_groups_0 = const()[name = tensor<string, []>("op_7109_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_25_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_25_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1024365696)))];
+            tensor<fp16, [1280]> blocks_25_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_25_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1037472960)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7109_cast_fp16 = conv(bias = blocks_25_mlp_2_bias_to_fp16, dilations = var_7109_dilations_0, groups = var_7109_groups_0, pad = var_7109_pad_0, pad_type = var_7109_pad_type_0, strides = var_7109_strides_0, weight = blocks_25_mlp_2_weight_to_fp16, x = input_261_cast_fp16)[name = tensor<string, []>("op_7109_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_105_cast_fp16 = add(x = inputs_103_cast_fp16, y = var_7109_cast_fp16)[name = tensor<string, []>("inputs_105_cast_fp16")];
+            tensor<int32, []> var_7118 = const()[name = tensor<string, []>("op_7118"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_263_axes_0 = const()[name = tensor<string, []>("input_263_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_263_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_263_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1037475584)))];
+            tensor<fp16, [1280]> input_263_beta_0_to_fp16 = const()[name = tensor<string, []>("input_263_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1037478208)))];
+            tensor<fp16, []> var_7134_to_fp16 = const()[name = tensor<string, []>("op_7134_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_263_cast_fp16 = layer_norm(axes = input_263_axes_0, beta = input_263_beta_0_to_fp16, epsilon = var_7134_to_fp16, gamma = input_263_gamma_0_to_fp16, x = inputs_105_cast_fp16)[name = tensor<string, []>("input_263_cast_fp16")];
+            tensor<string, []> q_53_pad_type_0 = const()[name = tensor<string, []>("q_53_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_53_strides_0 = const()[name = tensor<string, []>("q_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_53_pad_0 = const()[name = tensor<string, []>("q_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_53_dilations_0 = const()[name = tensor<string, []>("q_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_53_groups_0 = const()[name = tensor<string, []>("q_53_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_7169_weight_0_to_fp16 = const()[name = tensor<string, []>("op_7169_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1037480832)))];
+            tensor<fp16, [1280]> var_7169_bias_0_to_fp16 = const()[name = tensor<string, []>("op_7169_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1040757696)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7169_cast_fp16 = conv(bias = var_7169_bias_0_to_fp16, dilations = q_53_dilations_0, groups = q_53_groups_0, pad = q_53_pad_0, pad_type = q_53_pad_type_0, strides = q_53_strides_0, weight = var_7169_weight_0_to_fp16, x = input_263_cast_fp16)[name = tensor<string, []>("op_7169_cast_fp16")];
+            tensor<string, []> k_53_pad_type_0 = const()[name = tensor<string, []>("k_53_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_53_strides_0 = const()[name = tensor<string, []>("k_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_53_pad_0 = const()[name = tensor<string, []>("k_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_53_dilations_0 = const()[name = tensor<string, []>("k_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_53_groups_0 = const()[name = tensor<string, []>("k_53_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_26_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_26_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1040760320)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_53_cast_fp16 = conv(dilations = k_53_dilations_0, groups = k_53_groups_0, pad = k_53_pad_0, pad_type = k_53_pad_type_0, strides = k_53_strides_0, weight = blocks_26_attn_key_weight_to_fp16, x = input_263_cast_fp16)[name = tensor<string, []>("k_53_cast_fp16")];
+            tensor<string, []> var_7167_pad_type_0 = const()[name = tensor<string, []>("op_7167_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7167_strides_0 = const()[name = tensor<string, []>("op_7167_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7167_pad_0 = const()[name = tensor<string, []>("op_7167_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7167_dilations_0 = const()[name = tensor<string, []>("op_7167_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7167_groups_0 = const()[name = tensor<string, []>("op_7167_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_26_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_26_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1044037184)))];
+            tensor<fp16, [1280]> blocks_26_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_26_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1047314048)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7167_cast_fp16 = conv(bias = blocks_26_attn_value_bias_to_fp16, dilations = var_7167_dilations_0, groups = var_7167_groups_0, pad = var_7167_pad_0, pad_type = var_7167_pad_type_0, strides = var_7167_strides_0, weight = blocks_26_attn_value_weight_to_fp16, x = input_263_cast_fp16)[name = tensor<string, []>("op_7167_cast_fp16")];
+            tensor<int32, [20]> tile_78 = const()[name = tensor<string, []>("tile_78"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7170_axis_0 = const()[name = tensor<string, []>("op_7170_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_19 = split(axis = var_7170_axis_0, split_sizes = tile_78, x = var_7169_cast_fp16)[name = tensor<string, []>("op_7170_cast_fp16")];
+            tensor<int32, [4]> var_7191_perm_0 = const()[name = tensor<string, []>("op_7191_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_79 = const()[name = tensor<string, []>("tile_79"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7192_axis_0 = const()[name = tensor<string, []>("op_7192_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_7191_cast_fp16 = transpose(perm = var_7191_perm_0, x = k_53_cast_fp16)[name = tensor<string, []>("transpose_6")];
+            tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_19 = split(axis = var_7192_axis_0, split_sizes = tile_79, x = var_7191_cast_fp16)[name = tensor<string, []>("op_7192_cast_fp16")];
+            tensor<int32, [20]> tile_80 = const()[name = tensor<string, []>("tile_80"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7213_axis_0 = const()[name = tensor<string, []>("op_7213_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_19 = split(axis = var_7213_axis_0, split_sizes = tile_80, x = var_7167_cast_fp16)[name = tensor<string, []>("op_7213_cast_fp16")];
+            tensor<string, []> aw_1041_equation_0 = const()[name = tensor<string, []>("aw_1041_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1041_cast_fp16 = einsum(equation = aw_1041_equation_0, values = (var_7192_cast_fp16_0, var_7170_cast_fp16_0))[name = tensor<string, []>("aw_1041_cast_fp16")];
+            tensor<string, []> aw_1043_equation_0 = const()[name = tensor<string, []>("aw_1043_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1043_cast_fp16 = einsum(equation = aw_1043_equation_0, values = (var_7192_cast_fp16_1, var_7170_cast_fp16_1))[name = tensor<string, []>("aw_1043_cast_fp16")];
+            tensor<string, []> aw_1045_equation_0 = const()[name = tensor<string, []>("aw_1045_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1045_cast_fp16 = einsum(equation = aw_1045_equation_0, values = (var_7192_cast_fp16_2, var_7170_cast_fp16_2))[name = tensor<string, []>("aw_1045_cast_fp16")];
+            tensor<string, []> aw_1047_equation_0 = const()[name = tensor<string, []>("aw_1047_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1047_cast_fp16 = einsum(equation = aw_1047_equation_0, values = (var_7192_cast_fp16_3, var_7170_cast_fp16_3))[name = tensor<string, []>("aw_1047_cast_fp16")];
+            tensor<string, []> aw_1049_equation_0 = const()[name = tensor<string, []>("aw_1049_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1049_cast_fp16 = einsum(equation = aw_1049_equation_0, values = (var_7192_cast_fp16_4, var_7170_cast_fp16_4))[name = tensor<string, []>("aw_1049_cast_fp16")];
+            tensor<string, []> aw_1051_equation_0 = const()[name = tensor<string, []>("aw_1051_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1051_cast_fp16 = einsum(equation = aw_1051_equation_0, values = (var_7192_cast_fp16_5, var_7170_cast_fp16_5))[name = tensor<string, []>("aw_1051_cast_fp16")];
+            tensor<string, []> aw_1053_equation_0 = const()[name = tensor<string, []>("aw_1053_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1053_cast_fp16 = einsum(equation = aw_1053_equation_0, values = (var_7192_cast_fp16_6, var_7170_cast_fp16_6))[name = tensor<string, []>("aw_1053_cast_fp16")];
+            tensor<string, []> aw_1055_equation_0 = const()[name = tensor<string, []>("aw_1055_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1055_cast_fp16 = einsum(equation = aw_1055_equation_0, values = (var_7192_cast_fp16_7, var_7170_cast_fp16_7))[name = tensor<string, []>("aw_1055_cast_fp16")];
+            tensor<string, []> aw_1057_equation_0 = const()[name = tensor<string, []>("aw_1057_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1057_cast_fp16 = einsum(equation = aw_1057_equation_0, values = (var_7192_cast_fp16_8, var_7170_cast_fp16_8))[name = tensor<string, []>("aw_1057_cast_fp16")];
+            tensor<string, []> aw_1059_equation_0 = const()[name = tensor<string, []>("aw_1059_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1059_cast_fp16 = einsum(equation = aw_1059_equation_0, values = (var_7192_cast_fp16_9, var_7170_cast_fp16_9))[name = tensor<string, []>("aw_1059_cast_fp16")];
+            tensor<string, []> aw_1061_equation_0 = const()[name = tensor<string, []>("aw_1061_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1061_cast_fp16 = einsum(equation = aw_1061_equation_0, values = (var_7192_cast_fp16_10, var_7170_cast_fp16_10))[name = tensor<string, []>("aw_1061_cast_fp16")];
+            tensor<string, []> aw_1063_equation_0 = const()[name = tensor<string, []>("aw_1063_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1063_cast_fp16 = einsum(equation = aw_1063_equation_0, values = (var_7192_cast_fp16_11, var_7170_cast_fp16_11))[name = tensor<string, []>("aw_1063_cast_fp16")];
+            tensor<string, []> aw_1065_equation_0 = const()[name = tensor<string, []>("aw_1065_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1065_cast_fp16 = einsum(equation = aw_1065_equation_0, values = (var_7192_cast_fp16_12, var_7170_cast_fp16_12))[name = tensor<string, []>("aw_1065_cast_fp16")];
+            tensor<string, []> aw_1067_equation_0 = const()[name = tensor<string, []>("aw_1067_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1067_cast_fp16 = einsum(equation = aw_1067_equation_0, values = (var_7192_cast_fp16_13, var_7170_cast_fp16_13))[name = tensor<string, []>("aw_1067_cast_fp16")];
+            tensor<string, []> aw_1069_equation_0 = const()[name = tensor<string, []>("aw_1069_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1069_cast_fp16 = einsum(equation = aw_1069_equation_0, values = (var_7192_cast_fp16_14, var_7170_cast_fp16_14))[name = tensor<string, []>("aw_1069_cast_fp16")];
+            tensor<string, []> aw_1071_equation_0 = const()[name = tensor<string, []>("aw_1071_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1071_cast_fp16 = einsum(equation = aw_1071_equation_0, values = (var_7192_cast_fp16_15, var_7170_cast_fp16_15))[name = tensor<string, []>("aw_1071_cast_fp16")];
+            tensor<string, []> aw_1073_equation_0 = const()[name = tensor<string, []>("aw_1073_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1073_cast_fp16 = einsum(equation = aw_1073_equation_0, values = (var_7192_cast_fp16_16, var_7170_cast_fp16_16))[name = tensor<string, []>("aw_1073_cast_fp16")];
+            tensor<string, []> aw_1075_equation_0 = const()[name = tensor<string, []>("aw_1075_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1075_cast_fp16 = einsum(equation = aw_1075_equation_0, values = (var_7192_cast_fp16_17, var_7170_cast_fp16_17))[name = tensor<string, []>("aw_1075_cast_fp16")];
+            tensor<string, []> aw_1077_equation_0 = const()[name = tensor<string, []>("aw_1077_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1077_cast_fp16 = einsum(equation = aw_1077_equation_0, values = (var_7192_cast_fp16_18, var_7170_cast_fp16_18))[name = tensor<string, []>("aw_1077_cast_fp16")];
+            tensor<string, []> aw_1079_equation_0 = const()[name = tensor<string, []>("aw_1079_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1079_cast_fp16 = einsum(equation = aw_1079_equation_0, values = (var_7192_cast_fp16_19, var_7170_cast_fp16_19))[name = tensor<string, []>("aw_1079_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7274_cast_fp16 = softmax(axis = var_7118, x = aw_1041_cast_fp16)[name = tensor<string, []>("op_7274_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7275_cast_fp16 = softmax(axis = var_7118, x = aw_1043_cast_fp16)[name = tensor<string, []>("op_7275_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7276_cast_fp16 = softmax(axis = var_7118, x = aw_1045_cast_fp16)[name = tensor<string, []>("op_7276_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7277_cast_fp16 = softmax(axis = var_7118, x = aw_1047_cast_fp16)[name = tensor<string, []>("op_7277_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7278_cast_fp16 = softmax(axis = var_7118, x = aw_1049_cast_fp16)[name = tensor<string, []>("op_7278_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7279_cast_fp16 = softmax(axis = var_7118, x = aw_1051_cast_fp16)[name = tensor<string, []>("op_7279_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7280_cast_fp16 = softmax(axis = var_7118, x = aw_1053_cast_fp16)[name = tensor<string, []>("op_7280_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7281_cast_fp16 = softmax(axis = var_7118, x = aw_1055_cast_fp16)[name = tensor<string, []>("op_7281_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7282_cast_fp16 = softmax(axis = var_7118, x = aw_1057_cast_fp16)[name = tensor<string, []>("op_7282_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7283_cast_fp16 = softmax(axis = var_7118, x = aw_1059_cast_fp16)[name = tensor<string, []>("op_7283_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7284_cast_fp16 = softmax(axis = var_7118, x = aw_1061_cast_fp16)[name = tensor<string, []>("op_7284_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7285_cast_fp16 = softmax(axis = var_7118, x = aw_1063_cast_fp16)[name = tensor<string, []>("op_7285_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7286_cast_fp16 = softmax(axis = var_7118, x = aw_1065_cast_fp16)[name = tensor<string, []>("op_7286_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7287_cast_fp16 = softmax(axis = var_7118, x = aw_1067_cast_fp16)[name = tensor<string, []>("op_7287_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7288_cast_fp16 = softmax(axis = var_7118, x = aw_1069_cast_fp16)[name = tensor<string, []>("op_7288_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7289_cast_fp16 = softmax(axis = var_7118, x = aw_1071_cast_fp16)[name = tensor<string, []>("op_7289_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7290_cast_fp16 = softmax(axis = var_7118, x = aw_1073_cast_fp16)[name = tensor<string, []>("op_7290_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7291_cast_fp16 = softmax(axis = var_7118, x = aw_1075_cast_fp16)[name = tensor<string, []>("op_7291_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7292_cast_fp16 = softmax(axis = var_7118, x = aw_1077_cast_fp16)[name = tensor<string, []>("op_7292_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7293_cast_fp16 = softmax(axis = var_7118, x = aw_1079_cast_fp16)[name = tensor<string, []>("op_7293_cast_fp16")];
+            tensor<string, []> var_7295_equation_0 = const()[name = tensor<string, []>("op_7295_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7295_cast_fp16 = einsum(equation = var_7295_equation_0, values = (var_7213_cast_fp16_0, var_7274_cast_fp16))[name = tensor<string, []>("op_7295_cast_fp16")];
+            tensor<string, []> var_7297_equation_0 = const()[name = tensor<string, []>("op_7297_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7297_cast_fp16 = einsum(equation = var_7297_equation_0, values = (var_7213_cast_fp16_1, var_7275_cast_fp16))[name = tensor<string, []>("op_7297_cast_fp16")];
+            tensor<string, []> var_7299_equation_0 = const()[name = tensor<string, []>("op_7299_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7299_cast_fp16 = einsum(equation = var_7299_equation_0, values = (var_7213_cast_fp16_2, var_7276_cast_fp16))[name = tensor<string, []>("op_7299_cast_fp16")];
+            tensor<string, []> var_7301_equation_0 = const()[name = tensor<string, []>("op_7301_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7301_cast_fp16 = einsum(equation = var_7301_equation_0, values = (var_7213_cast_fp16_3, var_7277_cast_fp16))[name = tensor<string, []>("op_7301_cast_fp16")];
+            tensor<string, []> var_7303_equation_0 = const()[name = tensor<string, []>("op_7303_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7303_cast_fp16 = einsum(equation = var_7303_equation_0, values = (var_7213_cast_fp16_4, var_7278_cast_fp16))[name = tensor<string, []>("op_7303_cast_fp16")];
+            tensor<string, []> var_7305_equation_0 = const()[name = tensor<string, []>("op_7305_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7305_cast_fp16 = einsum(equation = var_7305_equation_0, values = (var_7213_cast_fp16_5, var_7279_cast_fp16))[name = tensor<string, []>("op_7305_cast_fp16")];
+            tensor<string, []> var_7307_equation_0 = const()[name = tensor<string, []>("op_7307_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7307_cast_fp16 = einsum(equation = var_7307_equation_0, values = (var_7213_cast_fp16_6, var_7280_cast_fp16))[name = tensor<string, []>("op_7307_cast_fp16")];
+            tensor<string, []> var_7309_equation_0 = const()[name = tensor<string, []>("op_7309_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7309_cast_fp16 = einsum(equation = var_7309_equation_0, values = (var_7213_cast_fp16_7, var_7281_cast_fp16))[name = tensor<string, []>("op_7309_cast_fp16")];
+            tensor<string, []> var_7311_equation_0 = const()[name = tensor<string, []>("op_7311_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7311_cast_fp16 = einsum(equation = var_7311_equation_0, values = (var_7213_cast_fp16_8, var_7282_cast_fp16))[name = tensor<string, []>("op_7311_cast_fp16")];
+            tensor<string, []> var_7313_equation_0 = const()[name = tensor<string, []>("op_7313_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7313_cast_fp16 = einsum(equation = var_7313_equation_0, values = (var_7213_cast_fp16_9, var_7283_cast_fp16))[name = tensor<string, []>("op_7313_cast_fp16")];
+            tensor<string, []> var_7315_equation_0 = const()[name = tensor<string, []>("op_7315_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7315_cast_fp16 = einsum(equation = var_7315_equation_0, values = (var_7213_cast_fp16_10, var_7284_cast_fp16))[name = tensor<string, []>("op_7315_cast_fp16")];
+            tensor<string, []> var_7317_equation_0 = const()[name = tensor<string, []>("op_7317_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7317_cast_fp16 = einsum(equation = var_7317_equation_0, values = (var_7213_cast_fp16_11, var_7285_cast_fp16))[name = tensor<string, []>("op_7317_cast_fp16")];
+            tensor<string, []> var_7319_equation_0 = const()[name = tensor<string, []>("op_7319_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7319_cast_fp16 = einsum(equation = var_7319_equation_0, values = (var_7213_cast_fp16_12, var_7286_cast_fp16))[name = tensor<string, []>("op_7319_cast_fp16")];
+            tensor<string, []> var_7321_equation_0 = const()[name = tensor<string, []>("op_7321_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7321_cast_fp16 = einsum(equation = var_7321_equation_0, values = (var_7213_cast_fp16_13, var_7287_cast_fp16))[name = tensor<string, []>("op_7321_cast_fp16")];
+            tensor<string, []> var_7323_equation_0 = const()[name = tensor<string, []>("op_7323_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7323_cast_fp16 = einsum(equation = var_7323_equation_0, values = (var_7213_cast_fp16_14, var_7288_cast_fp16))[name = tensor<string, []>("op_7323_cast_fp16")];
+            tensor<string, []> var_7325_equation_0 = const()[name = tensor<string, []>("op_7325_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7325_cast_fp16 = einsum(equation = var_7325_equation_0, values = (var_7213_cast_fp16_15, var_7289_cast_fp16))[name = tensor<string, []>("op_7325_cast_fp16")];
+            tensor<string, []> var_7327_equation_0 = const()[name = tensor<string, []>("op_7327_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7327_cast_fp16 = einsum(equation = var_7327_equation_0, values = (var_7213_cast_fp16_16, var_7290_cast_fp16))[name = tensor<string, []>("op_7327_cast_fp16")];
+            tensor<string, []> var_7329_equation_0 = const()[name = tensor<string, []>("op_7329_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7329_cast_fp16 = einsum(equation = var_7329_equation_0, values = (var_7213_cast_fp16_17, var_7291_cast_fp16))[name = tensor<string, []>("op_7329_cast_fp16")];
+            tensor<string, []> var_7331_equation_0 = const()[name = tensor<string, []>("op_7331_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7331_cast_fp16 = einsum(equation = var_7331_equation_0, values = (var_7213_cast_fp16_18, var_7292_cast_fp16))[name = tensor<string, []>("op_7331_cast_fp16")];
+            tensor<string, []> var_7333_equation_0 = const()[name = tensor<string, []>("op_7333_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7333_cast_fp16 = einsum(equation = var_7333_equation_0, values = (var_7213_cast_fp16_19, var_7293_cast_fp16))[name = tensor<string, []>("op_7333_cast_fp16")];
+            tensor<bool, []> input_265_interleave_0 = const()[name = tensor<string, []>("input_265_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_265_cast_fp16 = concat(axis = var_7118, interleave = input_265_interleave_0, values = (var_7295_cast_fp16, var_7297_cast_fp16, var_7299_cast_fp16, var_7301_cast_fp16, var_7303_cast_fp16, var_7305_cast_fp16, var_7307_cast_fp16, var_7309_cast_fp16, var_7311_cast_fp16, var_7313_cast_fp16, var_7315_cast_fp16, var_7317_cast_fp16, var_7319_cast_fp16, var_7321_cast_fp16, var_7323_cast_fp16, var_7325_cast_fp16, var_7327_cast_fp16, var_7329_cast_fp16, var_7331_cast_fp16, var_7333_cast_fp16))[name = tensor<string, []>("input_265_cast_fp16")];
+            tensor<string, []> var_7342_pad_type_0 = const()[name = tensor<string, []>("op_7342_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7342_strides_0 = const()[name = tensor<string, []>("op_7342_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7342_pad_0 = const()[name = tensor<string, []>("op_7342_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7342_dilations_0 = const()[name = tensor<string, []>("op_7342_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7342_groups_0 = const()[name = tensor<string, []>("op_7342_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_26_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_26_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1047316672)))];
+            tensor<fp16, [1280]> blocks_26_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_26_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050593536)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7342_cast_fp16 = conv(bias = blocks_26_attn_out_bias_to_fp16, dilations = var_7342_dilations_0, groups = var_7342_groups_0, pad = var_7342_pad_0, pad_type = var_7342_pad_type_0, strides = var_7342_strides_0, weight = blocks_26_attn_out_weight_to_fp16, x = input_265_cast_fp16)[name = tensor<string, []>("op_7342_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_107_cast_fp16 = add(x = inputs_105_cast_fp16, y = var_7342_cast_fp16)[name = tensor<string, []>("inputs_107_cast_fp16")];
+            tensor<int32, [1]> input_267_axes_0 = const()[name = tensor<string, []>("input_267_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_267_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_267_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050596160)))];
+            tensor<fp16, [1280]> input_267_beta_0_to_fp16 = const()[name = tensor<string, []>("input_267_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050598784)))];
+            tensor<fp16, []> var_7352_to_fp16 = const()[name = tensor<string, []>("op_7352_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_267_cast_fp16 = layer_norm(axes = input_267_axes_0, beta = input_267_beta_0_to_fp16, epsilon = var_7352_to_fp16, gamma = input_267_gamma_0_to_fp16, x = inputs_107_cast_fp16)[name = tensor<string, []>("input_267_cast_fp16")];
+            tensor<string, []> input_269_pad_type_0 = const()[name = tensor<string, []>("input_269_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_269_strides_0 = const()[name = tensor<string, []>("input_269_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_269_pad_0 = const()[name = tensor<string, []>("input_269_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_269_dilations_0 = const()[name = tensor<string, []>("input_269_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_269_groups_0 = const()[name = tensor<string, []>("input_269_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_26_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_26_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050601408)))];
+            tensor<fp16, [5120]> blocks_26_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_26_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1063708672)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_269_cast_fp16 = conv(bias = blocks_26_mlp_0_bias_to_fp16, dilations = input_269_dilations_0, groups = input_269_groups_0, pad = input_269_pad_0, pad_type = input_269_pad_type_0, strides = input_269_strides_0, weight = blocks_26_mlp_0_weight_to_fp16, x = input_267_cast_fp16)[name = tensor<string, []>("input_269_cast_fp16")];
+            tensor<string, []> input_271_mode_0 = const()[name = tensor<string, []>("input_271_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_271_cast_fp16 = gelu(mode = input_271_mode_0, x = input_269_cast_fp16)[name = tensor<string, []>("input_271_cast_fp16")];
+            tensor<string, []> var_7378_pad_type_0 = const()[name = tensor<string, []>("op_7378_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7378_strides_0 = const()[name = tensor<string, []>("op_7378_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7378_pad_0 = const()[name = tensor<string, []>("op_7378_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7378_dilations_0 = const()[name = tensor<string, []>("op_7378_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7378_groups_0 = const()[name = tensor<string, []>("op_7378_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_26_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_26_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1063718976)))];
+            tensor<fp16, [1280]> blocks_26_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_26_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1076826240)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7378_cast_fp16 = conv(bias = blocks_26_mlp_2_bias_to_fp16, dilations = var_7378_dilations_0, groups = var_7378_groups_0, pad = var_7378_pad_0, pad_type = var_7378_pad_type_0, strides = var_7378_strides_0, weight = blocks_26_mlp_2_weight_to_fp16, x = input_271_cast_fp16)[name = tensor<string, []>("op_7378_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_109_cast_fp16 = add(x = inputs_107_cast_fp16, y = var_7378_cast_fp16)[name = tensor<string, []>("inputs_109_cast_fp16")];
+            tensor<int32, []> var_7387 = const()[name = tensor<string, []>("op_7387"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_273_axes_0 = const()[name = tensor<string, []>("input_273_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_273_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_273_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1076828864)))];
+            tensor<fp16, [1280]> input_273_beta_0_to_fp16 = const()[name = tensor<string, []>("input_273_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1076831488)))];
+            tensor<fp16, []> var_7403_to_fp16 = const()[name = tensor<string, []>("op_7403_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_273_cast_fp16 = layer_norm(axes = input_273_axes_0, beta = input_273_beta_0_to_fp16, epsilon = var_7403_to_fp16, gamma = input_273_gamma_0_to_fp16, x = inputs_109_cast_fp16)[name = tensor<string, []>("input_273_cast_fp16")];
+            tensor<string, []> q_55_pad_type_0 = const()[name = tensor<string, []>("q_55_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_55_strides_0 = const()[name = tensor<string, []>("q_55_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_55_pad_0 = const()[name = tensor<string, []>("q_55_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_55_dilations_0 = const()[name = tensor<string, []>("q_55_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_55_groups_0 = const()[name = tensor<string, []>("q_55_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_7438_weight_0_to_fp16 = const()[name = tensor<string, []>("op_7438_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1076834112)))];
+            tensor<fp16, [1280]> var_7438_bias_0_to_fp16 = const()[name = tensor<string, []>("op_7438_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1080110976)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7438_cast_fp16 = conv(bias = var_7438_bias_0_to_fp16, dilations = q_55_dilations_0, groups = q_55_groups_0, pad = q_55_pad_0, pad_type = q_55_pad_type_0, strides = q_55_strides_0, weight = var_7438_weight_0_to_fp16, x = input_273_cast_fp16)[name = tensor<string, []>("op_7438_cast_fp16")];
+            tensor<string, []> k_55_pad_type_0 = const()[name = tensor<string, []>("k_55_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_55_strides_0 = const()[name = tensor<string, []>("k_55_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_55_pad_0 = const()[name = tensor<string, []>("k_55_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_55_dilations_0 = const()[name = tensor<string, []>("k_55_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_55_groups_0 = const()[name = tensor<string, []>("k_55_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_27_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_27_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1080113600)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_55_cast_fp16 = conv(dilations = k_55_dilations_0, groups = k_55_groups_0, pad = k_55_pad_0, pad_type = k_55_pad_type_0, strides = k_55_strides_0, weight = blocks_27_attn_key_weight_to_fp16, x = input_273_cast_fp16)[name = tensor<string, []>("k_55_cast_fp16")];
+            tensor<string, []> var_7436_pad_type_0 = const()[name = tensor<string, []>("op_7436_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7436_strides_0 = const()[name = tensor<string, []>("op_7436_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7436_pad_0 = const()[name = tensor<string, []>("op_7436_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7436_dilations_0 = const()[name = tensor<string, []>("op_7436_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7436_groups_0 = const()[name = tensor<string, []>("op_7436_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_27_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_27_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1083390464)))];
+            tensor<fp16, [1280]> blocks_27_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_27_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1086667328)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7436_cast_fp16 = conv(bias = blocks_27_attn_value_bias_to_fp16, dilations = var_7436_dilations_0, groups = var_7436_groups_0, pad = var_7436_pad_0, pad_type = var_7436_pad_type_0, strides = var_7436_strides_0, weight = blocks_27_attn_value_weight_to_fp16, x = input_273_cast_fp16)[name = tensor<string, []>("op_7436_cast_fp16")];
+            tensor<int32, [20]> tile_81 = const()[name = tensor<string, []>("tile_81"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7439_axis_0 = const()[name = tensor<string, []>("op_7439_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_19 = split(axis = var_7439_axis_0, split_sizes = tile_81, x = var_7438_cast_fp16)[name = tensor<string, []>("op_7439_cast_fp16")];
+            tensor<int32, [4]> var_7460_perm_0 = const()[name = tensor<string, []>("op_7460_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_82 = const()[name = tensor<string, []>("tile_82"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7461_axis_0 = const()[name = tensor<string, []>("op_7461_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_7460_cast_fp16 = transpose(perm = var_7460_perm_0, x = k_55_cast_fp16)[name = tensor<string, []>("transpose_5")];
+            tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_19 = split(axis = var_7461_axis_0, split_sizes = tile_82, x = var_7460_cast_fp16)[name = tensor<string, []>("op_7461_cast_fp16")];
+            tensor<int32, [20]> tile_83 = const()[name = tensor<string, []>("tile_83"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7482_axis_0 = const()[name = tensor<string, []>("op_7482_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_19 = split(axis = var_7482_axis_0, split_sizes = tile_83, x = var_7436_cast_fp16)[name = tensor<string, []>("op_7482_cast_fp16")];
+            tensor<string, []> aw_1081_equation_0 = const()[name = tensor<string, []>("aw_1081_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1081_cast_fp16 = einsum(equation = aw_1081_equation_0, values = (var_7461_cast_fp16_0, var_7439_cast_fp16_0))[name = tensor<string, []>("aw_1081_cast_fp16")];
+            tensor<string, []> aw_1083_equation_0 = const()[name = tensor<string, []>("aw_1083_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1083_cast_fp16 = einsum(equation = aw_1083_equation_0, values = (var_7461_cast_fp16_1, var_7439_cast_fp16_1))[name = tensor<string, []>("aw_1083_cast_fp16")];
+            tensor<string, []> aw_1085_equation_0 = const()[name = tensor<string, []>("aw_1085_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1085_cast_fp16 = einsum(equation = aw_1085_equation_0, values = (var_7461_cast_fp16_2, var_7439_cast_fp16_2))[name = tensor<string, []>("aw_1085_cast_fp16")];
+            tensor<string, []> aw_1087_equation_0 = const()[name = tensor<string, []>("aw_1087_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1087_cast_fp16 = einsum(equation = aw_1087_equation_0, values = (var_7461_cast_fp16_3, var_7439_cast_fp16_3))[name = tensor<string, []>("aw_1087_cast_fp16")];
+            tensor<string, []> aw_1089_equation_0 = const()[name = tensor<string, []>("aw_1089_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1089_cast_fp16 = einsum(equation = aw_1089_equation_0, values = (var_7461_cast_fp16_4, var_7439_cast_fp16_4))[name = tensor<string, []>("aw_1089_cast_fp16")];
+            tensor<string, []> aw_1091_equation_0 = const()[name = tensor<string, []>("aw_1091_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1091_cast_fp16 = einsum(equation = aw_1091_equation_0, values = (var_7461_cast_fp16_5, var_7439_cast_fp16_5))[name = tensor<string, []>("aw_1091_cast_fp16")];
+            tensor<string, []> aw_1093_equation_0 = const()[name = tensor<string, []>("aw_1093_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1093_cast_fp16 = einsum(equation = aw_1093_equation_0, values = (var_7461_cast_fp16_6, var_7439_cast_fp16_6))[name = tensor<string, []>("aw_1093_cast_fp16")];
+            tensor<string, []> aw_1095_equation_0 = const()[name = tensor<string, []>("aw_1095_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1095_cast_fp16 = einsum(equation = aw_1095_equation_0, values = (var_7461_cast_fp16_7, var_7439_cast_fp16_7))[name = tensor<string, []>("aw_1095_cast_fp16")];
+            tensor<string, []> aw_1097_equation_0 = const()[name = tensor<string, []>("aw_1097_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1097_cast_fp16 = einsum(equation = aw_1097_equation_0, values = (var_7461_cast_fp16_8, var_7439_cast_fp16_8))[name = tensor<string, []>("aw_1097_cast_fp16")];
+            tensor<string, []> aw_1099_equation_0 = const()[name = tensor<string, []>("aw_1099_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1099_cast_fp16 = einsum(equation = aw_1099_equation_0, values = (var_7461_cast_fp16_9, var_7439_cast_fp16_9))[name = tensor<string, []>("aw_1099_cast_fp16")];
+            tensor<string, []> aw_1101_equation_0 = const()[name = tensor<string, []>("aw_1101_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1101_cast_fp16 = einsum(equation = aw_1101_equation_0, values = (var_7461_cast_fp16_10, var_7439_cast_fp16_10))[name = tensor<string, []>("aw_1101_cast_fp16")];
+            tensor<string, []> aw_1103_equation_0 = const()[name = tensor<string, []>("aw_1103_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1103_cast_fp16 = einsum(equation = aw_1103_equation_0, values = (var_7461_cast_fp16_11, var_7439_cast_fp16_11))[name = tensor<string, []>("aw_1103_cast_fp16")];
+            tensor<string, []> aw_1105_equation_0 = const()[name = tensor<string, []>("aw_1105_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1105_cast_fp16 = einsum(equation = aw_1105_equation_0, values = (var_7461_cast_fp16_12, var_7439_cast_fp16_12))[name = tensor<string, []>("aw_1105_cast_fp16")];
+            tensor<string, []> aw_1107_equation_0 = const()[name = tensor<string, []>("aw_1107_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1107_cast_fp16 = einsum(equation = aw_1107_equation_0, values = (var_7461_cast_fp16_13, var_7439_cast_fp16_13))[name = tensor<string, []>("aw_1107_cast_fp16")];
+            tensor<string, []> aw_1109_equation_0 = const()[name = tensor<string, []>("aw_1109_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1109_cast_fp16 = einsum(equation = aw_1109_equation_0, values = (var_7461_cast_fp16_14, var_7439_cast_fp16_14))[name = tensor<string, []>("aw_1109_cast_fp16")];
+            tensor<string, []> aw_1111_equation_0 = const()[name = tensor<string, []>("aw_1111_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1111_cast_fp16 = einsum(equation = aw_1111_equation_0, values = (var_7461_cast_fp16_15, var_7439_cast_fp16_15))[name = tensor<string, []>("aw_1111_cast_fp16")];
+            tensor<string, []> aw_1113_equation_0 = const()[name = tensor<string, []>("aw_1113_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1113_cast_fp16 = einsum(equation = aw_1113_equation_0, values = (var_7461_cast_fp16_16, var_7439_cast_fp16_16))[name = tensor<string, []>("aw_1113_cast_fp16")];
+            tensor<string, []> aw_1115_equation_0 = const()[name = tensor<string, []>("aw_1115_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1115_cast_fp16 = einsum(equation = aw_1115_equation_0, values = (var_7461_cast_fp16_17, var_7439_cast_fp16_17))[name = tensor<string, []>("aw_1115_cast_fp16")];
+            tensor<string, []> aw_1117_equation_0 = const()[name = tensor<string, []>("aw_1117_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1117_cast_fp16 = einsum(equation = aw_1117_equation_0, values = (var_7461_cast_fp16_18, var_7439_cast_fp16_18))[name = tensor<string, []>("aw_1117_cast_fp16")];
+            tensor<string, []> aw_1119_equation_0 = const()[name = tensor<string, []>("aw_1119_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1119_cast_fp16 = einsum(equation = aw_1119_equation_0, values = (var_7461_cast_fp16_19, var_7439_cast_fp16_19))[name = tensor<string, []>("aw_1119_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7543_cast_fp16 = softmax(axis = var_7387, x = aw_1081_cast_fp16)[name = tensor<string, []>("op_7543_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7544_cast_fp16 = softmax(axis = var_7387, x = aw_1083_cast_fp16)[name = tensor<string, []>("op_7544_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7545_cast_fp16 = softmax(axis = var_7387, x = aw_1085_cast_fp16)[name = tensor<string, []>("op_7545_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7546_cast_fp16 = softmax(axis = var_7387, x = aw_1087_cast_fp16)[name = tensor<string, []>("op_7546_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7547_cast_fp16 = softmax(axis = var_7387, x = aw_1089_cast_fp16)[name = tensor<string, []>("op_7547_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7548_cast_fp16 = softmax(axis = var_7387, x = aw_1091_cast_fp16)[name = tensor<string, []>("op_7548_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7549_cast_fp16 = softmax(axis = var_7387, x = aw_1093_cast_fp16)[name = tensor<string, []>("op_7549_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7550_cast_fp16 = softmax(axis = var_7387, x = aw_1095_cast_fp16)[name = tensor<string, []>("op_7550_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7551_cast_fp16 = softmax(axis = var_7387, x = aw_1097_cast_fp16)[name = tensor<string, []>("op_7551_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7552_cast_fp16 = softmax(axis = var_7387, x = aw_1099_cast_fp16)[name = tensor<string, []>("op_7552_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7553_cast_fp16 = softmax(axis = var_7387, x = aw_1101_cast_fp16)[name = tensor<string, []>("op_7553_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7554_cast_fp16 = softmax(axis = var_7387, x = aw_1103_cast_fp16)[name = tensor<string, []>("op_7554_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7555_cast_fp16 = softmax(axis = var_7387, x = aw_1105_cast_fp16)[name = tensor<string, []>("op_7555_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7556_cast_fp16 = softmax(axis = var_7387, x = aw_1107_cast_fp16)[name = tensor<string, []>("op_7556_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7557_cast_fp16 = softmax(axis = var_7387, x = aw_1109_cast_fp16)[name = tensor<string, []>("op_7557_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7558_cast_fp16 = softmax(axis = var_7387, x = aw_1111_cast_fp16)[name = tensor<string, []>("op_7558_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7559_cast_fp16 = softmax(axis = var_7387, x = aw_1113_cast_fp16)[name = tensor<string, []>("op_7559_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7560_cast_fp16 = softmax(axis = var_7387, x = aw_1115_cast_fp16)[name = tensor<string, []>("op_7560_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7561_cast_fp16 = softmax(axis = var_7387, x = aw_1117_cast_fp16)[name = tensor<string, []>("op_7561_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7562_cast_fp16 = softmax(axis = var_7387, x = aw_1119_cast_fp16)[name = tensor<string, []>("op_7562_cast_fp16")];
+            tensor<string, []> var_7564_equation_0 = const()[name = tensor<string, []>("op_7564_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7564_cast_fp16 = einsum(equation = var_7564_equation_0, values = (var_7482_cast_fp16_0, var_7543_cast_fp16))[name = tensor<string, []>("op_7564_cast_fp16")];
+            tensor<string, []> var_7566_equation_0 = const()[name = tensor<string, []>("op_7566_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7566_cast_fp16 = einsum(equation = var_7566_equation_0, values = (var_7482_cast_fp16_1, var_7544_cast_fp16))[name = tensor<string, []>("op_7566_cast_fp16")];
+            tensor<string, []> var_7568_equation_0 = const()[name = tensor<string, []>("op_7568_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7568_cast_fp16 = einsum(equation = var_7568_equation_0, values = (var_7482_cast_fp16_2, var_7545_cast_fp16))[name = tensor<string, []>("op_7568_cast_fp16")];
+            tensor<string, []> var_7570_equation_0 = const()[name = tensor<string, []>("op_7570_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7570_cast_fp16 = einsum(equation = var_7570_equation_0, values = (var_7482_cast_fp16_3, var_7546_cast_fp16))[name = tensor<string, []>("op_7570_cast_fp16")];
+            tensor<string, []> var_7572_equation_0 = const()[name = tensor<string, []>("op_7572_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7572_cast_fp16 = einsum(equation = var_7572_equation_0, values = (var_7482_cast_fp16_4, var_7547_cast_fp16))[name = tensor<string, []>("op_7572_cast_fp16")];
+            tensor<string, []> var_7574_equation_0 = const()[name = tensor<string, []>("op_7574_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7574_cast_fp16 = einsum(equation = var_7574_equation_0, values = (var_7482_cast_fp16_5, var_7548_cast_fp16))[name = tensor<string, []>("op_7574_cast_fp16")];
+            tensor<string, []> var_7576_equation_0 = const()[name = tensor<string, []>("op_7576_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7576_cast_fp16 = einsum(equation = var_7576_equation_0, values = (var_7482_cast_fp16_6, var_7549_cast_fp16))[name = tensor<string, []>("op_7576_cast_fp16")];
+            tensor<string, []> var_7578_equation_0 = const()[name = tensor<string, []>("op_7578_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7578_cast_fp16 = einsum(equation = var_7578_equation_0, values = (var_7482_cast_fp16_7, var_7550_cast_fp16))[name = tensor<string, []>("op_7578_cast_fp16")];
+            tensor<string, []> var_7580_equation_0 = const()[name = tensor<string, []>("op_7580_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7580_cast_fp16 = einsum(equation = var_7580_equation_0, values = (var_7482_cast_fp16_8, var_7551_cast_fp16))[name = tensor<string, []>("op_7580_cast_fp16")];
+            tensor<string, []> var_7582_equation_0 = const()[name = tensor<string, []>("op_7582_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7582_cast_fp16 = einsum(equation = var_7582_equation_0, values = (var_7482_cast_fp16_9, var_7552_cast_fp16))[name = tensor<string, []>("op_7582_cast_fp16")];
+            tensor<string, []> var_7584_equation_0 = const()[name = tensor<string, []>("op_7584_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7584_cast_fp16 = einsum(equation = var_7584_equation_0, values = (var_7482_cast_fp16_10, var_7553_cast_fp16))[name = tensor<string, []>("op_7584_cast_fp16")];
+            tensor<string, []> var_7586_equation_0 = const()[name = tensor<string, []>("op_7586_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7586_cast_fp16 = einsum(equation = var_7586_equation_0, values = (var_7482_cast_fp16_11, var_7554_cast_fp16))[name = tensor<string, []>("op_7586_cast_fp16")];
+            tensor<string, []> var_7588_equation_0 = const()[name = tensor<string, []>("op_7588_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7588_cast_fp16 = einsum(equation = var_7588_equation_0, values = (var_7482_cast_fp16_12, var_7555_cast_fp16))[name = tensor<string, []>("op_7588_cast_fp16")];
+            tensor<string, []> var_7590_equation_0 = const()[name = tensor<string, []>("op_7590_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7590_cast_fp16 = einsum(equation = var_7590_equation_0, values = (var_7482_cast_fp16_13, var_7556_cast_fp16))[name = tensor<string, []>("op_7590_cast_fp16")];
+            tensor<string, []> var_7592_equation_0 = const()[name = tensor<string, []>("op_7592_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7592_cast_fp16 = einsum(equation = var_7592_equation_0, values = (var_7482_cast_fp16_14, var_7557_cast_fp16))[name = tensor<string, []>("op_7592_cast_fp16")];
+            tensor<string, []> var_7594_equation_0 = const()[name = tensor<string, []>("op_7594_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7594_cast_fp16 = einsum(equation = var_7594_equation_0, values = (var_7482_cast_fp16_15, var_7558_cast_fp16))[name = tensor<string, []>("op_7594_cast_fp16")];
+            tensor<string, []> var_7596_equation_0 = const()[name = tensor<string, []>("op_7596_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7596_cast_fp16 = einsum(equation = var_7596_equation_0, values = (var_7482_cast_fp16_16, var_7559_cast_fp16))[name = tensor<string, []>("op_7596_cast_fp16")];
+            tensor<string, []> var_7598_equation_0 = const()[name = tensor<string, []>("op_7598_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7598_cast_fp16 = einsum(equation = var_7598_equation_0, values = (var_7482_cast_fp16_17, var_7560_cast_fp16))[name = tensor<string, []>("op_7598_cast_fp16")];
+            tensor<string, []> var_7600_equation_0 = const()[name = tensor<string, []>("op_7600_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7600_cast_fp16 = einsum(equation = var_7600_equation_0, values = (var_7482_cast_fp16_18, var_7561_cast_fp16))[name = tensor<string, []>("op_7600_cast_fp16")];
+            tensor<string, []> var_7602_equation_0 = const()[name = tensor<string, []>("op_7602_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7602_cast_fp16 = einsum(equation = var_7602_equation_0, values = (var_7482_cast_fp16_19, var_7562_cast_fp16))[name = tensor<string, []>("op_7602_cast_fp16")];
+            tensor<bool, []> input_275_interleave_0 = const()[name = tensor<string, []>("input_275_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_275_cast_fp16 = concat(axis = var_7387, interleave = input_275_interleave_0, values = (var_7564_cast_fp16, var_7566_cast_fp16, var_7568_cast_fp16, var_7570_cast_fp16, var_7572_cast_fp16, var_7574_cast_fp16, var_7576_cast_fp16, var_7578_cast_fp16, var_7580_cast_fp16, var_7582_cast_fp16, var_7584_cast_fp16, var_7586_cast_fp16, var_7588_cast_fp16, var_7590_cast_fp16, var_7592_cast_fp16, var_7594_cast_fp16, var_7596_cast_fp16, var_7598_cast_fp16, var_7600_cast_fp16, var_7602_cast_fp16))[name = tensor<string, []>("input_275_cast_fp16")];
+            tensor<string, []> var_7611_pad_type_0 = const()[name = tensor<string, []>("op_7611_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7611_strides_0 = const()[name = tensor<string, []>("op_7611_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7611_pad_0 = const()[name = tensor<string, []>("op_7611_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7611_dilations_0 = const()[name = tensor<string, []>("op_7611_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7611_groups_0 = const()[name = tensor<string, []>("op_7611_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_27_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_27_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1086669952)))];
+            tensor<fp16, [1280]> blocks_27_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_27_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1089946816)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7611_cast_fp16 = conv(bias = blocks_27_attn_out_bias_to_fp16, dilations = var_7611_dilations_0, groups = var_7611_groups_0, pad = var_7611_pad_0, pad_type = var_7611_pad_type_0, strides = var_7611_strides_0, weight = blocks_27_attn_out_weight_to_fp16, x = input_275_cast_fp16)[name = tensor<string, []>("op_7611_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_111_cast_fp16 = add(x = inputs_109_cast_fp16, y = var_7611_cast_fp16)[name = tensor<string, []>("inputs_111_cast_fp16")];
+            tensor<int32, [1]> input_277_axes_0 = const()[name = tensor<string, []>("input_277_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_277_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_277_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1089949440)))];
+            tensor<fp16, [1280]> input_277_beta_0_to_fp16 = const()[name = tensor<string, []>("input_277_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1089952064)))];
+            tensor<fp16, []> var_7621_to_fp16 = const()[name = tensor<string, []>("op_7621_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_277_cast_fp16 = layer_norm(axes = input_277_axes_0, beta = input_277_beta_0_to_fp16, epsilon = var_7621_to_fp16, gamma = input_277_gamma_0_to_fp16, x = inputs_111_cast_fp16)[name = tensor<string, []>("input_277_cast_fp16")];
+            tensor<string, []> input_279_pad_type_0 = const()[name = tensor<string, []>("input_279_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_279_strides_0 = const()[name = tensor<string, []>("input_279_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_279_pad_0 = const()[name = tensor<string, []>("input_279_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_279_dilations_0 = const()[name = tensor<string, []>("input_279_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_279_groups_0 = const()[name = tensor<string, []>("input_279_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_27_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_27_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1089954688)))];
+            tensor<fp16, [5120]> blocks_27_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_27_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1103061952)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_279_cast_fp16 = conv(bias = blocks_27_mlp_0_bias_to_fp16, dilations = input_279_dilations_0, groups = input_279_groups_0, pad = input_279_pad_0, pad_type = input_279_pad_type_0, strides = input_279_strides_0, weight = blocks_27_mlp_0_weight_to_fp16, x = input_277_cast_fp16)[name = tensor<string, []>("input_279_cast_fp16")];
+            tensor<string, []> input_281_mode_0 = const()[name = tensor<string, []>("input_281_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_281_cast_fp16 = gelu(mode = input_281_mode_0, x = input_279_cast_fp16)[name = tensor<string, []>("input_281_cast_fp16")];
+            tensor<string, []> var_7647_pad_type_0 = const()[name = tensor<string, []>("op_7647_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7647_strides_0 = const()[name = tensor<string, []>("op_7647_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7647_pad_0 = const()[name = tensor<string, []>("op_7647_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7647_dilations_0 = const()[name = tensor<string, []>("op_7647_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7647_groups_0 = const()[name = tensor<string, []>("op_7647_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_27_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_27_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1103072256)))];
+            tensor<fp16, [1280]> blocks_27_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_27_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1116179520)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7647_cast_fp16 = conv(bias = blocks_27_mlp_2_bias_to_fp16, dilations = var_7647_dilations_0, groups = var_7647_groups_0, pad = var_7647_pad_0, pad_type = var_7647_pad_type_0, strides = var_7647_strides_0, weight = blocks_27_mlp_2_weight_to_fp16, x = input_281_cast_fp16)[name = tensor<string, []>("op_7647_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_113_cast_fp16 = add(x = inputs_111_cast_fp16, y = var_7647_cast_fp16)[name = tensor<string, []>("inputs_113_cast_fp16")];
+            tensor<int32, []> var_7656 = const()[name = tensor<string, []>("op_7656"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_283_axes_0 = const()[name = tensor<string, []>("input_283_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_283_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_283_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1116182144)))];
+            tensor<fp16, [1280]> input_283_beta_0_to_fp16 = const()[name = tensor<string, []>("input_283_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1116184768)))];
+            tensor<fp16, []> var_7672_to_fp16 = const()[name = tensor<string, []>("op_7672_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_283_cast_fp16 = layer_norm(axes = input_283_axes_0, beta = input_283_beta_0_to_fp16, epsilon = var_7672_to_fp16, gamma = input_283_gamma_0_to_fp16, x = inputs_113_cast_fp16)[name = tensor<string, []>("input_283_cast_fp16")];
+            tensor<string, []> q_57_pad_type_0 = const()[name = tensor<string, []>("q_57_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_57_strides_0 = const()[name = tensor<string, []>("q_57_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_57_pad_0 = const()[name = tensor<string, []>("q_57_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_57_dilations_0 = const()[name = tensor<string, []>("q_57_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_57_groups_0 = const()[name = tensor<string, []>("q_57_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_7707_weight_0_to_fp16 = const()[name = tensor<string, []>("op_7707_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1116187392)))];
+            tensor<fp16, [1280]> var_7707_bias_0_to_fp16 = const()[name = tensor<string, []>("op_7707_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1119464256)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7707_cast_fp16 = conv(bias = var_7707_bias_0_to_fp16, dilations = q_57_dilations_0, groups = q_57_groups_0, pad = q_57_pad_0, pad_type = q_57_pad_type_0, strides = q_57_strides_0, weight = var_7707_weight_0_to_fp16, x = input_283_cast_fp16)[name = tensor<string, []>("op_7707_cast_fp16")];
+            tensor<string, []> k_57_pad_type_0 = const()[name = tensor<string, []>("k_57_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_57_strides_0 = const()[name = tensor<string, []>("k_57_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_57_pad_0 = const()[name = tensor<string, []>("k_57_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_57_dilations_0 = const()[name = tensor<string, []>("k_57_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_57_groups_0 = const()[name = tensor<string, []>("k_57_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_28_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_28_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1119466880)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_57_cast_fp16 = conv(dilations = k_57_dilations_0, groups = k_57_groups_0, pad = k_57_pad_0, pad_type = k_57_pad_type_0, strides = k_57_strides_0, weight = blocks_28_attn_key_weight_to_fp16, x = input_283_cast_fp16)[name = tensor<string, []>("k_57_cast_fp16")];
+            tensor<string, []> var_7705_pad_type_0 = const()[name = tensor<string, []>("op_7705_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7705_strides_0 = const()[name = tensor<string, []>("op_7705_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7705_pad_0 = const()[name = tensor<string, []>("op_7705_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7705_dilations_0 = const()[name = tensor<string, []>("op_7705_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7705_groups_0 = const()[name = tensor<string, []>("op_7705_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_28_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_28_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1122743744)))];
+            tensor<fp16, [1280]> blocks_28_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_28_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1126020608)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7705_cast_fp16 = conv(bias = blocks_28_attn_value_bias_to_fp16, dilations = var_7705_dilations_0, groups = var_7705_groups_0, pad = var_7705_pad_0, pad_type = var_7705_pad_type_0, strides = var_7705_strides_0, weight = blocks_28_attn_value_weight_to_fp16, x = input_283_cast_fp16)[name = tensor<string, []>("op_7705_cast_fp16")];
+            tensor<int32, [20]> tile_84 = const()[name = tensor<string, []>("tile_84"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7708_axis_0 = const()[name = tensor<string, []>("op_7708_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_19 = split(axis = var_7708_axis_0, split_sizes = tile_84, x = var_7707_cast_fp16)[name = tensor<string, []>("op_7708_cast_fp16")];
+            tensor<int32, [4]> var_7729_perm_0 = const()[name = tensor<string, []>("op_7729_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_85 = const()[name = tensor<string, []>("tile_85"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7730_axis_0 = const()[name = tensor<string, []>("op_7730_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_7729_cast_fp16 = transpose(perm = var_7729_perm_0, x = k_57_cast_fp16)[name = tensor<string, []>("transpose_4")];
+            tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_19 = split(axis = var_7730_axis_0, split_sizes = tile_85, x = var_7729_cast_fp16)[name = tensor<string, []>("op_7730_cast_fp16")];
+            tensor<int32, [20]> tile_86 = const()[name = tensor<string, []>("tile_86"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7751_axis_0 = const()[name = tensor<string, []>("op_7751_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_19 = split(axis = var_7751_axis_0, split_sizes = tile_86, x = var_7705_cast_fp16)[name = tensor<string, []>("op_7751_cast_fp16")];
+            tensor<string, []> aw_1121_equation_0 = const()[name = tensor<string, []>("aw_1121_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1121_cast_fp16 = einsum(equation = aw_1121_equation_0, values = (var_7730_cast_fp16_0, var_7708_cast_fp16_0))[name = tensor<string, []>("aw_1121_cast_fp16")];
+            tensor<string, []> aw_1123_equation_0 = const()[name = tensor<string, []>("aw_1123_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1123_cast_fp16 = einsum(equation = aw_1123_equation_0, values = (var_7730_cast_fp16_1, var_7708_cast_fp16_1))[name = tensor<string, []>("aw_1123_cast_fp16")];
+            tensor<string, []> aw_1125_equation_0 = const()[name = tensor<string, []>("aw_1125_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1125_cast_fp16 = einsum(equation = aw_1125_equation_0, values = (var_7730_cast_fp16_2, var_7708_cast_fp16_2))[name = tensor<string, []>("aw_1125_cast_fp16")];
+            tensor<string, []> aw_1127_equation_0 = const()[name = tensor<string, []>("aw_1127_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1127_cast_fp16 = einsum(equation = aw_1127_equation_0, values = (var_7730_cast_fp16_3, var_7708_cast_fp16_3))[name = tensor<string, []>("aw_1127_cast_fp16")];
+            tensor<string, []> aw_1129_equation_0 = const()[name = tensor<string, []>("aw_1129_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1129_cast_fp16 = einsum(equation = aw_1129_equation_0, values = (var_7730_cast_fp16_4, var_7708_cast_fp16_4))[name = tensor<string, []>("aw_1129_cast_fp16")];
+            tensor<string, []> aw_1131_equation_0 = const()[name = tensor<string, []>("aw_1131_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1131_cast_fp16 = einsum(equation = aw_1131_equation_0, values = (var_7730_cast_fp16_5, var_7708_cast_fp16_5))[name = tensor<string, []>("aw_1131_cast_fp16")];
+            tensor<string, []> aw_1133_equation_0 = const()[name = tensor<string, []>("aw_1133_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1133_cast_fp16 = einsum(equation = aw_1133_equation_0, values = (var_7730_cast_fp16_6, var_7708_cast_fp16_6))[name = tensor<string, []>("aw_1133_cast_fp16")];
+            tensor<string, []> aw_1135_equation_0 = const()[name = tensor<string, []>("aw_1135_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1135_cast_fp16 = einsum(equation = aw_1135_equation_0, values = (var_7730_cast_fp16_7, var_7708_cast_fp16_7))[name = tensor<string, []>("aw_1135_cast_fp16")];
+            tensor<string, []> aw_1137_equation_0 = const()[name = tensor<string, []>("aw_1137_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1137_cast_fp16 = einsum(equation = aw_1137_equation_0, values = (var_7730_cast_fp16_8, var_7708_cast_fp16_8))[name = tensor<string, []>("aw_1137_cast_fp16")];
+            tensor<string, []> aw_1139_equation_0 = const()[name = tensor<string, []>("aw_1139_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1139_cast_fp16 = einsum(equation = aw_1139_equation_0, values = (var_7730_cast_fp16_9, var_7708_cast_fp16_9))[name = tensor<string, []>("aw_1139_cast_fp16")];
+            tensor<string, []> aw_1141_equation_0 = const()[name = tensor<string, []>("aw_1141_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1141_cast_fp16 = einsum(equation = aw_1141_equation_0, values = (var_7730_cast_fp16_10, var_7708_cast_fp16_10))[name = tensor<string, []>("aw_1141_cast_fp16")];
+            tensor<string, []> aw_1143_equation_0 = const()[name = tensor<string, []>("aw_1143_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1143_cast_fp16 = einsum(equation = aw_1143_equation_0, values = (var_7730_cast_fp16_11, var_7708_cast_fp16_11))[name = tensor<string, []>("aw_1143_cast_fp16")];
+            tensor<string, []> aw_1145_equation_0 = const()[name = tensor<string, []>("aw_1145_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1145_cast_fp16 = einsum(equation = aw_1145_equation_0, values = (var_7730_cast_fp16_12, var_7708_cast_fp16_12))[name = tensor<string, []>("aw_1145_cast_fp16")];
+            tensor<string, []> aw_1147_equation_0 = const()[name = tensor<string, []>("aw_1147_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1147_cast_fp16 = einsum(equation = aw_1147_equation_0, values = (var_7730_cast_fp16_13, var_7708_cast_fp16_13))[name = tensor<string, []>("aw_1147_cast_fp16")];
+            tensor<string, []> aw_1149_equation_0 = const()[name = tensor<string, []>("aw_1149_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1149_cast_fp16 = einsum(equation = aw_1149_equation_0, values = (var_7730_cast_fp16_14, var_7708_cast_fp16_14))[name = tensor<string, []>("aw_1149_cast_fp16")];
+            tensor<string, []> aw_1151_equation_0 = const()[name = tensor<string, []>("aw_1151_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1151_cast_fp16 = einsum(equation = aw_1151_equation_0, values = (var_7730_cast_fp16_15, var_7708_cast_fp16_15))[name = tensor<string, []>("aw_1151_cast_fp16")];
+            tensor<string, []> aw_1153_equation_0 = const()[name = tensor<string, []>("aw_1153_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1153_cast_fp16 = einsum(equation = aw_1153_equation_0, values = (var_7730_cast_fp16_16, var_7708_cast_fp16_16))[name = tensor<string, []>("aw_1153_cast_fp16")];
+            tensor<string, []> aw_1155_equation_0 = const()[name = tensor<string, []>("aw_1155_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1155_cast_fp16 = einsum(equation = aw_1155_equation_0, values = (var_7730_cast_fp16_17, var_7708_cast_fp16_17))[name = tensor<string, []>("aw_1155_cast_fp16")];
+            tensor<string, []> aw_1157_equation_0 = const()[name = tensor<string, []>("aw_1157_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1157_cast_fp16 = einsum(equation = aw_1157_equation_0, values = (var_7730_cast_fp16_18, var_7708_cast_fp16_18))[name = tensor<string, []>("aw_1157_cast_fp16")];
+            tensor<string, []> aw_1159_equation_0 = const()[name = tensor<string, []>("aw_1159_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1159_cast_fp16 = einsum(equation = aw_1159_equation_0, values = (var_7730_cast_fp16_19, var_7708_cast_fp16_19))[name = tensor<string, []>("aw_1159_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7812_cast_fp16 = softmax(axis = var_7656, x = aw_1121_cast_fp16)[name = tensor<string, []>("op_7812_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7813_cast_fp16 = softmax(axis = var_7656, x = aw_1123_cast_fp16)[name = tensor<string, []>("op_7813_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7814_cast_fp16 = softmax(axis = var_7656, x = aw_1125_cast_fp16)[name = tensor<string, []>("op_7814_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7815_cast_fp16 = softmax(axis = var_7656, x = aw_1127_cast_fp16)[name = tensor<string, []>("op_7815_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7816_cast_fp16 = softmax(axis = var_7656, x = aw_1129_cast_fp16)[name = tensor<string, []>("op_7816_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7817_cast_fp16 = softmax(axis = var_7656, x = aw_1131_cast_fp16)[name = tensor<string, []>("op_7817_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7818_cast_fp16 = softmax(axis = var_7656, x = aw_1133_cast_fp16)[name = tensor<string, []>("op_7818_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7819_cast_fp16 = softmax(axis = var_7656, x = aw_1135_cast_fp16)[name = tensor<string, []>("op_7819_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7820_cast_fp16 = softmax(axis = var_7656, x = aw_1137_cast_fp16)[name = tensor<string, []>("op_7820_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7821_cast_fp16 = softmax(axis = var_7656, x = aw_1139_cast_fp16)[name = tensor<string, []>("op_7821_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7822_cast_fp16 = softmax(axis = var_7656, x = aw_1141_cast_fp16)[name = tensor<string, []>("op_7822_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7823_cast_fp16 = softmax(axis = var_7656, x = aw_1143_cast_fp16)[name = tensor<string, []>("op_7823_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7824_cast_fp16 = softmax(axis = var_7656, x = aw_1145_cast_fp16)[name = tensor<string, []>("op_7824_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7825_cast_fp16 = softmax(axis = var_7656, x = aw_1147_cast_fp16)[name = tensor<string, []>("op_7825_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7826_cast_fp16 = softmax(axis = var_7656, x = aw_1149_cast_fp16)[name = tensor<string, []>("op_7826_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7827_cast_fp16 = softmax(axis = var_7656, x = aw_1151_cast_fp16)[name = tensor<string, []>("op_7827_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7828_cast_fp16 = softmax(axis = var_7656, x = aw_1153_cast_fp16)[name = tensor<string, []>("op_7828_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7829_cast_fp16 = softmax(axis = var_7656, x = aw_1155_cast_fp16)[name = tensor<string, []>("op_7829_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7830_cast_fp16 = softmax(axis = var_7656, x = aw_1157_cast_fp16)[name = tensor<string, []>("op_7830_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7831_cast_fp16 = softmax(axis = var_7656, x = aw_1159_cast_fp16)[name = tensor<string, []>("op_7831_cast_fp16")];
+            tensor<string, []> var_7833_equation_0 = const()[name = tensor<string, []>("op_7833_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7833_cast_fp16 = einsum(equation = var_7833_equation_0, values = (var_7751_cast_fp16_0, var_7812_cast_fp16))[name = tensor<string, []>("op_7833_cast_fp16")];
+            tensor<string, []> var_7835_equation_0 = const()[name = tensor<string, []>("op_7835_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7835_cast_fp16 = einsum(equation = var_7835_equation_0, values = (var_7751_cast_fp16_1, var_7813_cast_fp16))[name = tensor<string, []>("op_7835_cast_fp16")];
+            tensor<string, []> var_7837_equation_0 = const()[name = tensor<string, []>("op_7837_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7837_cast_fp16 = einsum(equation = var_7837_equation_0, values = (var_7751_cast_fp16_2, var_7814_cast_fp16))[name = tensor<string, []>("op_7837_cast_fp16")];
+            tensor<string, []> var_7839_equation_0 = const()[name = tensor<string, []>("op_7839_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7839_cast_fp16 = einsum(equation = var_7839_equation_0, values = (var_7751_cast_fp16_3, var_7815_cast_fp16))[name = tensor<string, []>("op_7839_cast_fp16")];
+            tensor<string, []> var_7841_equation_0 = const()[name = tensor<string, []>("op_7841_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7841_cast_fp16 = einsum(equation = var_7841_equation_0, values = (var_7751_cast_fp16_4, var_7816_cast_fp16))[name = tensor<string, []>("op_7841_cast_fp16")];
+            tensor<string, []> var_7843_equation_0 = const()[name = tensor<string, []>("op_7843_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7843_cast_fp16 = einsum(equation = var_7843_equation_0, values = (var_7751_cast_fp16_5, var_7817_cast_fp16))[name = tensor<string, []>("op_7843_cast_fp16")];
+            tensor<string, []> var_7845_equation_0 = const()[name = tensor<string, []>("op_7845_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7845_cast_fp16 = einsum(equation = var_7845_equation_0, values = (var_7751_cast_fp16_6, var_7818_cast_fp16))[name = tensor<string, []>("op_7845_cast_fp16")];
+            tensor<string, []> var_7847_equation_0 = const()[name = tensor<string, []>("op_7847_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7847_cast_fp16 = einsum(equation = var_7847_equation_0, values = (var_7751_cast_fp16_7, var_7819_cast_fp16))[name = tensor<string, []>("op_7847_cast_fp16")];
+            tensor<string, []> var_7849_equation_0 = const()[name = tensor<string, []>("op_7849_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7849_cast_fp16 = einsum(equation = var_7849_equation_0, values = (var_7751_cast_fp16_8, var_7820_cast_fp16))[name = tensor<string, []>("op_7849_cast_fp16")];
+            tensor<string, []> var_7851_equation_0 = const()[name = tensor<string, []>("op_7851_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7851_cast_fp16 = einsum(equation = var_7851_equation_0, values = (var_7751_cast_fp16_9, var_7821_cast_fp16))[name = tensor<string, []>("op_7851_cast_fp16")];
+            tensor<string, []> var_7853_equation_0 = const()[name = tensor<string, []>("op_7853_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7853_cast_fp16 = einsum(equation = var_7853_equation_0, values = (var_7751_cast_fp16_10, var_7822_cast_fp16))[name = tensor<string, []>("op_7853_cast_fp16")];
+            tensor<string, []> var_7855_equation_0 = const()[name = tensor<string, []>("op_7855_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7855_cast_fp16 = einsum(equation = var_7855_equation_0, values = (var_7751_cast_fp16_11, var_7823_cast_fp16))[name = tensor<string, []>("op_7855_cast_fp16")];
+            tensor<string, []> var_7857_equation_0 = const()[name = tensor<string, []>("op_7857_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7857_cast_fp16 = einsum(equation = var_7857_equation_0, values = (var_7751_cast_fp16_12, var_7824_cast_fp16))[name = tensor<string, []>("op_7857_cast_fp16")];
+            tensor<string, []> var_7859_equation_0 = const()[name = tensor<string, []>("op_7859_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7859_cast_fp16 = einsum(equation = var_7859_equation_0, values = (var_7751_cast_fp16_13, var_7825_cast_fp16))[name = tensor<string, []>("op_7859_cast_fp16")];
+            tensor<string, []> var_7861_equation_0 = const()[name = tensor<string, []>("op_7861_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7861_cast_fp16 = einsum(equation = var_7861_equation_0, values = (var_7751_cast_fp16_14, var_7826_cast_fp16))[name = tensor<string, []>("op_7861_cast_fp16")];
+            tensor<string, []> var_7863_equation_0 = const()[name = tensor<string, []>("op_7863_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7863_cast_fp16 = einsum(equation = var_7863_equation_0, values = (var_7751_cast_fp16_15, var_7827_cast_fp16))[name = tensor<string, []>("op_7863_cast_fp16")];
+            tensor<string, []> var_7865_equation_0 = const()[name = tensor<string, []>("op_7865_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7865_cast_fp16 = einsum(equation = var_7865_equation_0, values = (var_7751_cast_fp16_16, var_7828_cast_fp16))[name = tensor<string, []>("op_7865_cast_fp16")];
+            tensor<string, []> var_7867_equation_0 = const()[name = tensor<string, []>("op_7867_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7867_cast_fp16 = einsum(equation = var_7867_equation_0, values = (var_7751_cast_fp16_17, var_7829_cast_fp16))[name = tensor<string, []>("op_7867_cast_fp16")];
+            tensor<string, []> var_7869_equation_0 = const()[name = tensor<string, []>("op_7869_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7869_cast_fp16 = einsum(equation = var_7869_equation_0, values = (var_7751_cast_fp16_18, var_7830_cast_fp16))[name = tensor<string, []>("op_7869_cast_fp16")];
+            tensor<string, []> var_7871_equation_0 = const()[name = tensor<string, []>("op_7871_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7871_cast_fp16 = einsum(equation = var_7871_equation_0, values = (var_7751_cast_fp16_19, var_7831_cast_fp16))[name = tensor<string, []>("op_7871_cast_fp16")];
+            tensor<bool, []> input_285_interleave_0 = const()[name = tensor<string, []>("input_285_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_285_cast_fp16 = concat(axis = var_7656, interleave = input_285_interleave_0, values = (var_7833_cast_fp16, var_7835_cast_fp16, var_7837_cast_fp16, var_7839_cast_fp16, var_7841_cast_fp16, var_7843_cast_fp16, var_7845_cast_fp16, var_7847_cast_fp16, var_7849_cast_fp16, var_7851_cast_fp16, var_7853_cast_fp16, var_7855_cast_fp16, var_7857_cast_fp16, var_7859_cast_fp16, var_7861_cast_fp16, var_7863_cast_fp16, var_7865_cast_fp16, var_7867_cast_fp16, var_7869_cast_fp16, var_7871_cast_fp16))[name = tensor<string, []>("input_285_cast_fp16")];
+            tensor<string, []> var_7880_pad_type_0 = const()[name = tensor<string, []>("op_7880_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7880_strides_0 = const()[name = tensor<string, []>("op_7880_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7880_pad_0 = const()[name = tensor<string, []>("op_7880_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7880_dilations_0 = const()[name = tensor<string, []>("op_7880_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7880_groups_0 = const()[name = tensor<string, []>("op_7880_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_28_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_28_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1126023232)))];
+            tensor<fp16, [1280]> blocks_28_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_28_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1129300096)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7880_cast_fp16 = conv(bias = blocks_28_attn_out_bias_to_fp16, dilations = var_7880_dilations_0, groups = var_7880_groups_0, pad = var_7880_pad_0, pad_type = var_7880_pad_type_0, strides = var_7880_strides_0, weight = blocks_28_attn_out_weight_to_fp16, x = input_285_cast_fp16)[name = tensor<string, []>("op_7880_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_115_cast_fp16 = add(x = inputs_113_cast_fp16, y = var_7880_cast_fp16)[name = tensor<string, []>("inputs_115_cast_fp16")];
+            tensor<int32, [1]> input_287_axes_0 = const()[name = tensor<string, []>("input_287_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_287_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_287_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1129302720)))];
+            tensor<fp16, [1280]> input_287_beta_0_to_fp16 = const()[name = tensor<string, []>("input_287_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1129305344)))];
+            tensor<fp16, []> var_7890_to_fp16 = const()[name = tensor<string, []>("op_7890_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_287_cast_fp16 = layer_norm(axes = input_287_axes_0, beta = input_287_beta_0_to_fp16, epsilon = var_7890_to_fp16, gamma = input_287_gamma_0_to_fp16, x = inputs_115_cast_fp16)[name = tensor<string, []>("input_287_cast_fp16")];
+            tensor<string, []> input_289_pad_type_0 = const()[name = tensor<string, []>("input_289_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_289_strides_0 = const()[name = tensor<string, []>("input_289_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_289_pad_0 = const()[name = tensor<string, []>("input_289_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_289_dilations_0 = const()[name = tensor<string, []>("input_289_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_289_groups_0 = const()[name = tensor<string, []>("input_289_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_28_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_28_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1129307968)))];
+            tensor<fp16, [5120]> blocks_28_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_28_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1142415232)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_289_cast_fp16 = conv(bias = blocks_28_mlp_0_bias_to_fp16, dilations = input_289_dilations_0, groups = input_289_groups_0, pad = input_289_pad_0, pad_type = input_289_pad_type_0, strides = input_289_strides_0, weight = blocks_28_mlp_0_weight_to_fp16, x = input_287_cast_fp16)[name = tensor<string, []>("input_289_cast_fp16")];
+            tensor<string, []> input_291_mode_0 = const()[name = tensor<string, []>("input_291_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_291_cast_fp16 = gelu(mode = input_291_mode_0, x = input_289_cast_fp16)[name = tensor<string, []>("input_291_cast_fp16")];
+            tensor<string, []> var_7916_pad_type_0 = const()[name = tensor<string, []>("op_7916_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7916_strides_0 = const()[name = tensor<string, []>("op_7916_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7916_pad_0 = const()[name = tensor<string, []>("op_7916_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7916_dilations_0 = const()[name = tensor<string, []>("op_7916_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7916_groups_0 = const()[name = tensor<string, []>("op_7916_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_28_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_28_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1142425536)))];
+            tensor<fp16, [1280]> blocks_28_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_28_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1155532800)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7916_cast_fp16 = conv(bias = blocks_28_mlp_2_bias_to_fp16, dilations = var_7916_dilations_0, groups = var_7916_groups_0, pad = var_7916_pad_0, pad_type = var_7916_pad_type_0, strides = var_7916_strides_0, weight = blocks_28_mlp_2_weight_to_fp16, x = input_291_cast_fp16)[name = tensor<string, []>("op_7916_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_117_cast_fp16 = add(x = inputs_115_cast_fp16, y = var_7916_cast_fp16)[name = tensor<string, []>("inputs_117_cast_fp16")];
+            tensor<int32, []> var_7925 = const()[name = tensor<string, []>("op_7925"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_293_axes_0 = const()[name = tensor<string, []>("input_293_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_293_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_293_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1155535424)))];
+            tensor<fp16, [1280]> input_293_beta_0_to_fp16 = const()[name = tensor<string, []>("input_293_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1155538048)))];
+            tensor<fp16, []> var_7941_to_fp16 = const()[name = tensor<string, []>("op_7941_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_293_cast_fp16 = layer_norm(axes = input_293_axes_0, beta = input_293_beta_0_to_fp16, epsilon = var_7941_to_fp16, gamma = input_293_gamma_0_to_fp16, x = inputs_117_cast_fp16)[name = tensor<string, []>("input_293_cast_fp16")];
+            tensor<string, []> q_59_pad_type_0 = const()[name = tensor<string, []>("q_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_59_strides_0 = const()[name = tensor<string, []>("q_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_59_pad_0 = const()[name = tensor<string, []>("q_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_59_dilations_0 = const()[name = tensor<string, []>("q_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_59_groups_0 = const()[name = tensor<string, []>("q_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_7976_weight_0_to_fp16 = const()[name = tensor<string, []>("op_7976_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1155540672)))];
+            tensor<fp16, [1280]> var_7976_bias_0_to_fp16 = const()[name = tensor<string, []>("op_7976_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1158817536)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7976_cast_fp16 = conv(bias = var_7976_bias_0_to_fp16, dilations = q_59_dilations_0, groups = q_59_groups_0, pad = q_59_pad_0, pad_type = q_59_pad_type_0, strides = q_59_strides_0, weight = var_7976_weight_0_to_fp16, x = input_293_cast_fp16)[name = tensor<string, []>("op_7976_cast_fp16")];
+            tensor<string, []> k_59_pad_type_0 = const()[name = tensor<string, []>("k_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_59_strides_0 = const()[name = tensor<string, []>("k_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_59_pad_0 = const()[name = tensor<string, []>("k_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_59_dilations_0 = const()[name = tensor<string, []>("k_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_59_groups_0 = const()[name = tensor<string, []>("k_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_29_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_29_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1158820160)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_59_cast_fp16 = conv(dilations = k_59_dilations_0, groups = k_59_groups_0, pad = k_59_pad_0, pad_type = k_59_pad_type_0, strides = k_59_strides_0, weight = blocks_29_attn_key_weight_to_fp16, x = input_293_cast_fp16)[name = tensor<string, []>("k_59_cast_fp16")];
+            tensor<string, []> var_7974_pad_type_0 = const()[name = tensor<string, []>("op_7974_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7974_strides_0 = const()[name = tensor<string, []>("op_7974_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7974_pad_0 = const()[name = tensor<string, []>("op_7974_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7974_dilations_0 = const()[name = tensor<string, []>("op_7974_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7974_groups_0 = const()[name = tensor<string, []>("op_7974_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_29_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_29_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1162097024)))];
+            tensor<fp16, [1280]> blocks_29_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_29_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1165373888)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7974_cast_fp16 = conv(bias = blocks_29_attn_value_bias_to_fp16, dilations = var_7974_dilations_0, groups = var_7974_groups_0, pad = var_7974_pad_0, pad_type = var_7974_pad_type_0, strides = var_7974_strides_0, weight = blocks_29_attn_value_weight_to_fp16, x = input_293_cast_fp16)[name = tensor<string, []>("op_7974_cast_fp16")];
+            tensor<int32, [20]> tile_87 = const()[name = tensor<string, []>("tile_87"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7977_axis_0 = const()[name = tensor<string, []>("op_7977_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_19 = split(axis = var_7977_axis_0, split_sizes = tile_87, x = var_7976_cast_fp16)[name = tensor<string, []>("op_7977_cast_fp16")];
+            tensor<int32, [4]> var_7998_perm_0 = const()[name = tensor<string, []>("op_7998_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_88 = const()[name = tensor<string, []>("tile_88"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7999_axis_0 = const()[name = tensor<string, []>("op_7999_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_7998_cast_fp16 = transpose(perm = var_7998_perm_0, x = k_59_cast_fp16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_19 = split(axis = var_7999_axis_0, split_sizes = tile_88, x = var_7998_cast_fp16)[name = tensor<string, []>("op_7999_cast_fp16")];
+            tensor<int32, [20]> tile_89 = const()[name = tensor<string, []>("tile_89"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8020_axis_0 = const()[name = tensor<string, []>("op_8020_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_19 = split(axis = var_8020_axis_0, split_sizes = tile_89, x = var_7974_cast_fp16)[name = tensor<string, []>("op_8020_cast_fp16")];
+            tensor<string, []> aw_1161_equation_0 = const()[name = tensor<string, []>("aw_1161_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1161_cast_fp16 = einsum(equation = aw_1161_equation_0, values = (var_7999_cast_fp16_0, var_7977_cast_fp16_0))[name = tensor<string, []>("aw_1161_cast_fp16")];
+            tensor<string, []> aw_1163_equation_0 = const()[name = tensor<string, []>("aw_1163_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1163_cast_fp16 = einsum(equation = aw_1163_equation_0, values = (var_7999_cast_fp16_1, var_7977_cast_fp16_1))[name = tensor<string, []>("aw_1163_cast_fp16")];
+            tensor<string, []> aw_1165_equation_0 = const()[name = tensor<string, []>("aw_1165_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1165_cast_fp16 = einsum(equation = aw_1165_equation_0, values = (var_7999_cast_fp16_2, var_7977_cast_fp16_2))[name = tensor<string, []>("aw_1165_cast_fp16")];
+            tensor<string, []> aw_1167_equation_0 = const()[name = tensor<string, []>("aw_1167_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1167_cast_fp16 = einsum(equation = aw_1167_equation_0, values = (var_7999_cast_fp16_3, var_7977_cast_fp16_3))[name = tensor<string, []>("aw_1167_cast_fp16")];
+            tensor<string, []> aw_1169_equation_0 = const()[name = tensor<string, []>("aw_1169_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1169_cast_fp16 = einsum(equation = aw_1169_equation_0, values = (var_7999_cast_fp16_4, var_7977_cast_fp16_4))[name = tensor<string, []>("aw_1169_cast_fp16")];
+            tensor<string, []> aw_1171_equation_0 = const()[name = tensor<string, []>("aw_1171_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1171_cast_fp16 = einsum(equation = aw_1171_equation_0, values = (var_7999_cast_fp16_5, var_7977_cast_fp16_5))[name = tensor<string, []>("aw_1171_cast_fp16")];
+            tensor<string, []> aw_1173_equation_0 = const()[name = tensor<string, []>("aw_1173_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1173_cast_fp16 = einsum(equation = aw_1173_equation_0, values = (var_7999_cast_fp16_6, var_7977_cast_fp16_6))[name = tensor<string, []>("aw_1173_cast_fp16")];
+            tensor<string, []> aw_1175_equation_0 = const()[name = tensor<string, []>("aw_1175_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1175_cast_fp16 = einsum(equation = aw_1175_equation_0, values = (var_7999_cast_fp16_7, var_7977_cast_fp16_7))[name = tensor<string, []>("aw_1175_cast_fp16")];
+            tensor<string, []> aw_1177_equation_0 = const()[name = tensor<string, []>("aw_1177_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1177_cast_fp16 = einsum(equation = aw_1177_equation_0, values = (var_7999_cast_fp16_8, var_7977_cast_fp16_8))[name = tensor<string, []>("aw_1177_cast_fp16")];
+            tensor<string, []> aw_1179_equation_0 = const()[name = tensor<string, []>("aw_1179_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1179_cast_fp16 = einsum(equation = aw_1179_equation_0, values = (var_7999_cast_fp16_9, var_7977_cast_fp16_9))[name = tensor<string, []>("aw_1179_cast_fp16")];
+            tensor<string, []> aw_1181_equation_0 = const()[name = tensor<string, []>("aw_1181_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1181_cast_fp16 = einsum(equation = aw_1181_equation_0, values = (var_7999_cast_fp16_10, var_7977_cast_fp16_10))[name = tensor<string, []>("aw_1181_cast_fp16")];
+            tensor<string, []> aw_1183_equation_0 = const()[name = tensor<string, []>("aw_1183_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1183_cast_fp16 = einsum(equation = aw_1183_equation_0, values = (var_7999_cast_fp16_11, var_7977_cast_fp16_11))[name = tensor<string, []>("aw_1183_cast_fp16")];
+            tensor<string, []> aw_1185_equation_0 = const()[name = tensor<string, []>("aw_1185_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1185_cast_fp16 = einsum(equation = aw_1185_equation_0, values = (var_7999_cast_fp16_12, var_7977_cast_fp16_12))[name = tensor<string, []>("aw_1185_cast_fp16")];
+            tensor<string, []> aw_1187_equation_0 = const()[name = tensor<string, []>("aw_1187_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1187_cast_fp16 = einsum(equation = aw_1187_equation_0, values = (var_7999_cast_fp16_13, var_7977_cast_fp16_13))[name = tensor<string, []>("aw_1187_cast_fp16")];
+            tensor<string, []> aw_1189_equation_0 = const()[name = tensor<string, []>("aw_1189_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1189_cast_fp16 = einsum(equation = aw_1189_equation_0, values = (var_7999_cast_fp16_14, var_7977_cast_fp16_14))[name = tensor<string, []>("aw_1189_cast_fp16")];
+            tensor<string, []> aw_1191_equation_0 = const()[name = tensor<string, []>("aw_1191_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1191_cast_fp16 = einsum(equation = aw_1191_equation_0, values = (var_7999_cast_fp16_15, var_7977_cast_fp16_15))[name = tensor<string, []>("aw_1191_cast_fp16")];
+            tensor<string, []> aw_1193_equation_0 = const()[name = tensor<string, []>("aw_1193_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1193_cast_fp16 = einsum(equation = aw_1193_equation_0, values = (var_7999_cast_fp16_16, var_7977_cast_fp16_16))[name = tensor<string, []>("aw_1193_cast_fp16")];
+            tensor<string, []> aw_1195_equation_0 = const()[name = tensor<string, []>("aw_1195_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1195_cast_fp16 = einsum(equation = aw_1195_equation_0, values = (var_7999_cast_fp16_17, var_7977_cast_fp16_17))[name = tensor<string, []>("aw_1195_cast_fp16")];
+            tensor<string, []> aw_1197_equation_0 = const()[name = tensor<string, []>("aw_1197_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1197_cast_fp16 = einsum(equation = aw_1197_equation_0, values = (var_7999_cast_fp16_18, var_7977_cast_fp16_18))[name = tensor<string, []>("aw_1197_cast_fp16")];
+            tensor<string, []> aw_1199_equation_0 = const()[name = tensor<string, []>("aw_1199_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1199_cast_fp16 = einsum(equation = aw_1199_equation_0, values = (var_7999_cast_fp16_19, var_7977_cast_fp16_19))[name = tensor<string, []>("aw_1199_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8081_cast_fp16 = softmax(axis = var_7925, x = aw_1161_cast_fp16)[name = tensor<string, []>("op_8081_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8082_cast_fp16 = softmax(axis = var_7925, x = aw_1163_cast_fp16)[name = tensor<string, []>("op_8082_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8083_cast_fp16 = softmax(axis = var_7925, x = aw_1165_cast_fp16)[name = tensor<string, []>("op_8083_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8084_cast_fp16 = softmax(axis = var_7925, x = aw_1167_cast_fp16)[name = tensor<string, []>("op_8084_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8085_cast_fp16 = softmax(axis = var_7925, x = aw_1169_cast_fp16)[name = tensor<string, []>("op_8085_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8086_cast_fp16 = softmax(axis = var_7925, x = aw_1171_cast_fp16)[name = tensor<string, []>("op_8086_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8087_cast_fp16 = softmax(axis = var_7925, x = aw_1173_cast_fp16)[name = tensor<string, []>("op_8087_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8088_cast_fp16 = softmax(axis = var_7925, x = aw_1175_cast_fp16)[name = tensor<string, []>("op_8088_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8089_cast_fp16 = softmax(axis = var_7925, x = aw_1177_cast_fp16)[name = tensor<string, []>("op_8089_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8090_cast_fp16 = softmax(axis = var_7925, x = aw_1179_cast_fp16)[name = tensor<string, []>("op_8090_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8091_cast_fp16 = softmax(axis = var_7925, x = aw_1181_cast_fp16)[name = tensor<string, []>("op_8091_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8092_cast_fp16 = softmax(axis = var_7925, x = aw_1183_cast_fp16)[name = tensor<string, []>("op_8092_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8093_cast_fp16 = softmax(axis = var_7925, x = aw_1185_cast_fp16)[name = tensor<string, []>("op_8093_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8094_cast_fp16 = softmax(axis = var_7925, x = aw_1187_cast_fp16)[name = tensor<string, []>("op_8094_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8095_cast_fp16 = softmax(axis = var_7925, x = aw_1189_cast_fp16)[name = tensor<string, []>("op_8095_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8096_cast_fp16 = softmax(axis = var_7925, x = aw_1191_cast_fp16)[name = tensor<string, []>("op_8096_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8097_cast_fp16 = softmax(axis = var_7925, x = aw_1193_cast_fp16)[name = tensor<string, []>("op_8097_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8098_cast_fp16 = softmax(axis = var_7925, x = aw_1195_cast_fp16)[name = tensor<string, []>("op_8098_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8099_cast_fp16 = softmax(axis = var_7925, x = aw_1197_cast_fp16)[name = tensor<string, []>("op_8099_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8100_cast_fp16 = softmax(axis = var_7925, x = aw_1199_cast_fp16)[name = tensor<string, []>("op_8100_cast_fp16")];
+            tensor<string, []> var_8102_equation_0 = const()[name = tensor<string, []>("op_8102_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8102_cast_fp16 = einsum(equation = var_8102_equation_0, values = (var_8020_cast_fp16_0, var_8081_cast_fp16))[name = tensor<string, []>("op_8102_cast_fp16")];
+            tensor<string, []> var_8104_equation_0 = const()[name = tensor<string, []>("op_8104_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8104_cast_fp16 = einsum(equation = var_8104_equation_0, values = (var_8020_cast_fp16_1, var_8082_cast_fp16))[name = tensor<string, []>("op_8104_cast_fp16")];
+            tensor<string, []> var_8106_equation_0 = const()[name = tensor<string, []>("op_8106_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8106_cast_fp16 = einsum(equation = var_8106_equation_0, values = (var_8020_cast_fp16_2, var_8083_cast_fp16))[name = tensor<string, []>("op_8106_cast_fp16")];
+            tensor<string, []> var_8108_equation_0 = const()[name = tensor<string, []>("op_8108_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8108_cast_fp16 = einsum(equation = var_8108_equation_0, values = (var_8020_cast_fp16_3, var_8084_cast_fp16))[name = tensor<string, []>("op_8108_cast_fp16")];
+            tensor<string, []> var_8110_equation_0 = const()[name = tensor<string, []>("op_8110_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8110_cast_fp16 = einsum(equation = var_8110_equation_0, values = (var_8020_cast_fp16_4, var_8085_cast_fp16))[name = tensor<string, []>("op_8110_cast_fp16")];
+            tensor<string, []> var_8112_equation_0 = const()[name = tensor<string, []>("op_8112_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8112_cast_fp16 = einsum(equation = var_8112_equation_0, values = (var_8020_cast_fp16_5, var_8086_cast_fp16))[name = tensor<string, []>("op_8112_cast_fp16")];
+            tensor<string, []> var_8114_equation_0 = const()[name = tensor<string, []>("op_8114_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8114_cast_fp16 = einsum(equation = var_8114_equation_0, values = (var_8020_cast_fp16_6, var_8087_cast_fp16))[name = tensor<string, []>("op_8114_cast_fp16")];
+            tensor<string, []> var_8116_equation_0 = const()[name = tensor<string, []>("op_8116_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8116_cast_fp16 = einsum(equation = var_8116_equation_0, values = (var_8020_cast_fp16_7, var_8088_cast_fp16))[name = tensor<string, []>("op_8116_cast_fp16")];
+            tensor<string, []> var_8118_equation_0 = const()[name = tensor<string, []>("op_8118_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8118_cast_fp16 = einsum(equation = var_8118_equation_0, values = (var_8020_cast_fp16_8, var_8089_cast_fp16))[name = tensor<string, []>("op_8118_cast_fp16")];
+            tensor<string, []> var_8120_equation_0 = const()[name = tensor<string, []>("op_8120_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8120_cast_fp16 = einsum(equation = var_8120_equation_0, values = (var_8020_cast_fp16_9, var_8090_cast_fp16))[name = tensor<string, []>("op_8120_cast_fp16")];
+            tensor<string, []> var_8122_equation_0 = const()[name = tensor<string, []>("op_8122_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8122_cast_fp16 = einsum(equation = var_8122_equation_0, values = (var_8020_cast_fp16_10, var_8091_cast_fp16))[name = tensor<string, []>("op_8122_cast_fp16")];
+            tensor<string, []> var_8124_equation_0 = const()[name = tensor<string, []>("op_8124_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8124_cast_fp16 = einsum(equation = var_8124_equation_0, values = (var_8020_cast_fp16_11, var_8092_cast_fp16))[name = tensor<string, []>("op_8124_cast_fp16")];
+            tensor<string, []> var_8126_equation_0 = const()[name = tensor<string, []>("op_8126_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8126_cast_fp16 = einsum(equation = var_8126_equation_0, values = (var_8020_cast_fp16_12, var_8093_cast_fp16))[name = tensor<string, []>("op_8126_cast_fp16")];
+            tensor<string, []> var_8128_equation_0 = const()[name = tensor<string, []>("op_8128_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8128_cast_fp16 = einsum(equation = var_8128_equation_0, values = (var_8020_cast_fp16_13, var_8094_cast_fp16))[name = tensor<string, []>("op_8128_cast_fp16")];
+            tensor<string, []> var_8130_equation_0 = const()[name = tensor<string, []>("op_8130_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8130_cast_fp16 = einsum(equation = var_8130_equation_0, values = (var_8020_cast_fp16_14, var_8095_cast_fp16))[name = tensor<string, []>("op_8130_cast_fp16")];
+            tensor<string, []> var_8132_equation_0 = const()[name = tensor<string, []>("op_8132_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8132_cast_fp16 = einsum(equation = var_8132_equation_0, values = (var_8020_cast_fp16_15, var_8096_cast_fp16))[name = tensor<string, []>("op_8132_cast_fp16")];
+            tensor<string, []> var_8134_equation_0 = const()[name = tensor<string, []>("op_8134_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8134_cast_fp16 = einsum(equation = var_8134_equation_0, values = (var_8020_cast_fp16_16, var_8097_cast_fp16))[name = tensor<string, []>("op_8134_cast_fp16")];
+            tensor<string, []> var_8136_equation_0 = const()[name = tensor<string, []>("op_8136_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8136_cast_fp16 = einsum(equation = var_8136_equation_0, values = (var_8020_cast_fp16_17, var_8098_cast_fp16))[name = tensor<string, []>("op_8136_cast_fp16")];
+            tensor<string, []> var_8138_equation_0 = const()[name = tensor<string, []>("op_8138_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8138_cast_fp16 = einsum(equation = var_8138_equation_0, values = (var_8020_cast_fp16_18, var_8099_cast_fp16))[name = tensor<string, []>("op_8138_cast_fp16")];
+            tensor<string, []> var_8140_equation_0 = const()[name = tensor<string, []>("op_8140_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8140_cast_fp16 = einsum(equation = var_8140_equation_0, values = (var_8020_cast_fp16_19, var_8100_cast_fp16))[name = tensor<string, []>("op_8140_cast_fp16")];
+            tensor<bool, []> input_295_interleave_0 = const()[name = tensor<string, []>("input_295_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_295_cast_fp16 = concat(axis = var_7925, interleave = input_295_interleave_0, values = (var_8102_cast_fp16, var_8104_cast_fp16, var_8106_cast_fp16, var_8108_cast_fp16, var_8110_cast_fp16, var_8112_cast_fp16, var_8114_cast_fp16, var_8116_cast_fp16, var_8118_cast_fp16, var_8120_cast_fp16, var_8122_cast_fp16, var_8124_cast_fp16, var_8126_cast_fp16, var_8128_cast_fp16, var_8130_cast_fp16, var_8132_cast_fp16, var_8134_cast_fp16, var_8136_cast_fp16, var_8138_cast_fp16, var_8140_cast_fp16))[name = tensor<string, []>("input_295_cast_fp16")];
+            tensor<string, []> var_8149_pad_type_0 = const()[name = tensor<string, []>("op_8149_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8149_strides_0 = const()[name = tensor<string, []>("op_8149_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8149_pad_0 = const()[name = tensor<string, []>("op_8149_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8149_dilations_0 = const()[name = tensor<string, []>("op_8149_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8149_groups_0 = const()[name = tensor<string, []>("op_8149_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_29_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_29_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1165376512)))];
+            tensor<fp16, [1280]> blocks_29_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_29_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1168653376)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8149_cast_fp16 = conv(bias = blocks_29_attn_out_bias_to_fp16, dilations = var_8149_dilations_0, groups = var_8149_groups_0, pad = var_8149_pad_0, pad_type = var_8149_pad_type_0, strides = var_8149_strides_0, weight = blocks_29_attn_out_weight_to_fp16, x = input_295_cast_fp16)[name = tensor<string, []>("op_8149_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_119_cast_fp16 = add(x = inputs_117_cast_fp16, y = var_8149_cast_fp16)[name = tensor<string, []>("inputs_119_cast_fp16")];
+            tensor<int32, [1]> input_297_axes_0 = const()[name = tensor<string, []>("input_297_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_297_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_297_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1168656000)))];
+            tensor<fp16, [1280]> input_297_beta_0_to_fp16 = const()[name = tensor<string, []>("input_297_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1168658624)))];
+            tensor<fp16, []> var_8159_to_fp16 = const()[name = tensor<string, []>("op_8159_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_297_cast_fp16 = layer_norm(axes = input_297_axes_0, beta = input_297_beta_0_to_fp16, epsilon = var_8159_to_fp16, gamma = input_297_gamma_0_to_fp16, x = inputs_119_cast_fp16)[name = tensor<string, []>("input_297_cast_fp16")];
+            tensor<string, []> input_299_pad_type_0 = const()[name = tensor<string, []>("input_299_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_299_strides_0 = const()[name = tensor<string, []>("input_299_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_299_pad_0 = const()[name = tensor<string, []>("input_299_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_299_dilations_0 = const()[name = tensor<string, []>("input_299_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_299_groups_0 = const()[name = tensor<string, []>("input_299_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_29_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_29_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1168661248)))];
+            tensor<fp16, [5120]> blocks_29_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_29_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1181768512)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_299_cast_fp16 = conv(bias = blocks_29_mlp_0_bias_to_fp16, dilations = input_299_dilations_0, groups = input_299_groups_0, pad = input_299_pad_0, pad_type = input_299_pad_type_0, strides = input_299_strides_0, weight = blocks_29_mlp_0_weight_to_fp16, x = input_297_cast_fp16)[name = tensor<string, []>("input_299_cast_fp16")];
+            tensor<string, []> input_301_mode_0 = const()[name = tensor<string, []>("input_301_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_301_cast_fp16 = gelu(mode = input_301_mode_0, x = input_299_cast_fp16)[name = tensor<string, []>("input_301_cast_fp16")];
+            tensor<string, []> var_8185_pad_type_0 = const()[name = tensor<string, []>("op_8185_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8185_strides_0 = const()[name = tensor<string, []>("op_8185_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8185_pad_0 = const()[name = tensor<string, []>("op_8185_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8185_dilations_0 = const()[name = tensor<string, []>("op_8185_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8185_groups_0 = const()[name = tensor<string, []>("op_8185_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_29_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_29_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1181778816)))];
+            tensor<fp16, [1280]> blocks_29_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_29_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1194886080)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8185_cast_fp16 = conv(bias = blocks_29_mlp_2_bias_to_fp16, dilations = var_8185_dilations_0, groups = var_8185_groups_0, pad = var_8185_pad_0, pad_type = var_8185_pad_type_0, strides = var_8185_strides_0, weight = blocks_29_mlp_2_weight_to_fp16, x = input_301_cast_fp16)[name = tensor<string, []>("op_8185_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_121_cast_fp16 = add(x = inputs_119_cast_fp16, y = var_8185_cast_fp16)[name = tensor<string, []>("inputs_121_cast_fp16")];
+            tensor<int32, []> var_8194 = const()[name = tensor<string, []>("op_8194"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_303_axes_0 = const()[name = tensor<string, []>("input_303_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_303_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_303_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1194888704)))];
+            tensor<fp16, [1280]> input_303_beta_0_to_fp16 = const()[name = tensor<string, []>("input_303_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1194891328)))];
+            tensor<fp16, []> var_8210_to_fp16 = const()[name = tensor<string, []>("op_8210_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_303_cast_fp16 = layer_norm(axes = input_303_axes_0, beta = input_303_beta_0_to_fp16, epsilon = var_8210_to_fp16, gamma = input_303_gamma_0_to_fp16, x = inputs_121_cast_fp16)[name = tensor<string, []>("input_303_cast_fp16")];
+            tensor<string, []> q_61_pad_type_0 = const()[name = tensor<string, []>("q_61_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_61_strides_0 = const()[name = tensor<string, []>("q_61_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_61_pad_0 = const()[name = tensor<string, []>("q_61_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_61_dilations_0 = const()[name = tensor<string, []>("q_61_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_61_groups_0 = const()[name = tensor<string, []>("q_61_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_8245_weight_0_to_fp16 = const()[name = tensor<string, []>("op_8245_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1194893952)))];
+            tensor<fp16, [1280]> var_8245_bias_0_to_fp16 = const()[name = tensor<string, []>("op_8245_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1198170816)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8245_cast_fp16 = conv(bias = var_8245_bias_0_to_fp16, dilations = q_61_dilations_0, groups = q_61_groups_0, pad = q_61_pad_0, pad_type = q_61_pad_type_0, strides = q_61_strides_0, weight = var_8245_weight_0_to_fp16, x = input_303_cast_fp16)[name = tensor<string, []>("op_8245_cast_fp16")];
+            tensor<string, []> k_61_pad_type_0 = const()[name = tensor<string, []>("k_61_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_61_strides_0 = const()[name = tensor<string, []>("k_61_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_61_pad_0 = const()[name = tensor<string, []>("k_61_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_61_dilations_0 = const()[name = tensor<string, []>("k_61_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_61_groups_0 = const()[name = tensor<string, []>("k_61_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_30_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_30_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1198173440)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_61_cast_fp16 = conv(dilations = k_61_dilations_0, groups = k_61_groups_0, pad = k_61_pad_0, pad_type = k_61_pad_type_0, strides = k_61_strides_0, weight = blocks_30_attn_key_weight_to_fp16, x = input_303_cast_fp16)[name = tensor<string, []>("k_61_cast_fp16")];
+            tensor<string, []> var_8243_pad_type_0 = const()[name = tensor<string, []>("op_8243_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8243_strides_0 = const()[name = tensor<string, []>("op_8243_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8243_pad_0 = const()[name = tensor<string, []>("op_8243_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8243_dilations_0 = const()[name = tensor<string, []>("op_8243_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8243_groups_0 = const()[name = tensor<string, []>("op_8243_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_30_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_30_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1201450304)))];
+            tensor<fp16, [1280]> blocks_30_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_30_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1204727168)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8243_cast_fp16 = conv(bias = blocks_30_attn_value_bias_to_fp16, dilations = var_8243_dilations_0, groups = var_8243_groups_0, pad = var_8243_pad_0, pad_type = var_8243_pad_type_0, strides = var_8243_strides_0, weight = blocks_30_attn_value_weight_to_fp16, x = input_303_cast_fp16)[name = tensor<string, []>("op_8243_cast_fp16")];
+            tensor<int32, [20]> tile_90 = const()[name = tensor<string, []>("tile_90"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8246_axis_0 = const()[name = tensor<string, []>("op_8246_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_19 = split(axis = var_8246_axis_0, split_sizes = tile_90, x = var_8245_cast_fp16)[name = tensor<string, []>("op_8246_cast_fp16")];
+            tensor<int32, [4]> var_8267_perm_0 = const()[name = tensor<string, []>("op_8267_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_91 = const()[name = tensor<string, []>("tile_91"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8268_axis_0 = const()[name = tensor<string, []>("op_8268_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_8267_cast_fp16 = transpose(perm = var_8267_perm_0, x = k_61_cast_fp16)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_19 = split(axis = var_8268_axis_0, split_sizes = tile_91, x = var_8267_cast_fp16)[name = tensor<string, []>("op_8268_cast_fp16")];
+            tensor<int32, [20]> tile_92 = const()[name = tensor<string, []>("tile_92"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8289_axis_0 = const()[name = tensor<string, []>("op_8289_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_19 = split(axis = var_8289_axis_0, split_sizes = tile_92, x = var_8243_cast_fp16)[name = tensor<string, []>("op_8289_cast_fp16")];
+            tensor<string, []> aw_1201_equation_0 = const()[name = tensor<string, []>("aw_1201_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1201_cast_fp16 = einsum(equation = aw_1201_equation_0, values = (var_8268_cast_fp16_0, var_8246_cast_fp16_0))[name = tensor<string, []>("aw_1201_cast_fp16")];
+            tensor<string, []> aw_1203_equation_0 = const()[name = tensor<string, []>("aw_1203_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1203_cast_fp16 = einsum(equation = aw_1203_equation_0, values = (var_8268_cast_fp16_1, var_8246_cast_fp16_1))[name = tensor<string, []>("aw_1203_cast_fp16")];
+            tensor<string, []> aw_1205_equation_0 = const()[name = tensor<string, []>("aw_1205_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1205_cast_fp16 = einsum(equation = aw_1205_equation_0, values = (var_8268_cast_fp16_2, var_8246_cast_fp16_2))[name = tensor<string, []>("aw_1205_cast_fp16")];
+            tensor<string, []> aw_1207_equation_0 = const()[name = tensor<string, []>("aw_1207_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1207_cast_fp16 = einsum(equation = aw_1207_equation_0, values = (var_8268_cast_fp16_3, var_8246_cast_fp16_3))[name = tensor<string, []>("aw_1207_cast_fp16")];
+            tensor<string, []> aw_1209_equation_0 = const()[name = tensor<string, []>("aw_1209_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1209_cast_fp16 = einsum(equation = aw_1209_equation_0, values = (var_8268_cast_fp16_4, var_8246_cast_fp16_4))[name = tensor<string, []>("aw_1209_cast_fp16")];
+            tensor<string, []> aw_1211_equation_0 = const()[name = tensor<string, []>("aw_1211_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1211_cast_fp16 = einsum(equation = aw_1211_equation_0, values = (var_8268_cast_fp16_5, var_8246_cast_fp16_5))[name = tensor<string, []>("aw_1211_cast_fp16")];
+            tensor<string, []> aw_1213_equation_0 = const()[name = tensor<string, []>("aw_1213_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1213_cast_fp16 = einsum(equation = aw_1213_equation_0, values = (var_8268_cast_fp16_6, var_8246_cast_fp16_6))[name = tensor<string, []>("aw_1213_cast_fp16")];
+            tensor<string, []> aw_1215_equation_0 = const()[name = tensor<string, []>("aw_1215_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1215_cast_fp16 = einsum(equation = aw_1215_equation_0, values = (var_8268_cast_fp16_7, var_8246_cast_fp16_7))[name = tensor<string, []>("aw_1215_cast_fp16")];
+            tensor<string, []> aw_1217_equation_0 = const()[name = tensor<string, []>("aw_1217_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1217_cast_fp16 = einsum(equation = aw_1217_equation_0, values = (var_8268_cast_fp16_8, var_8246_cast_fp16_8))[name = tensor<string, []>("aw_1217_cast_fp16")];
+            tensor<string, []> aw_1219_equation_0 = const()[name = tensor<string, []>("aw_1219_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1219_cast_fp16 = einsum(equation = aw_1219_equation_0, values = (var_8268_cast_fp16_9, var_8246_cast_fp16_9))[name = tensor<string, []>("aw_1219_cast_fp16")];
+            tensor<string, []> aw_1221_equation_0 = const()[name = tensor<string, []>("aw_1221_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1221_cast_fp16 = einsum(equation = aw_1221_equation_0, values = (var_8268_cast_fp16_10, var_8246_cast_fp16_10))[name = tensor<string, []>("aw_1221_cast_fp16")];
+            tensor<string, []> aw_1223_equation_0 = const()[name = tensor<string, []>("aw_1223_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1223_cast_fp16 = einsum(equation = aw_1223_equation_0, values = (var_8268_cast_fp16_11, var_8246_cast_fp16_11))[name = tensor<string, []>("aw_1223_cast_fp16")];
+            tensor<string, []> aw_1225_equation_0 = const()[name = tensor<string, []>("aw_1225_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1225_cast_fp16 = einsum(equation = aw_1225_equation_0, values = (var_8268_cast_fp16_12, var_8246_cast_fp16_12))[name = tensor<string, []>("aw_1225_cast_fp16")];
+            tensor<string, []> aw_1227_equation_0 = const()[name = tensor<string, []>("aw_1227_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1227_cast_fp16 = einsum(equation = aw_1227_equation_0, values = (var_8268_cast_fp16_13, var_8246_cast_fp16_13))[name = tensor<string, []>("aw_1227_cast_fp16")];
+            tensor<string, []> aw_1229_equation_0 = const()[name = tensor<string, []>("aw_1229_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1229_cast_fp16 = einsum(equation = aw_1229_equation_0, values = (var_8268_cast_fp16_14, var_8246_cast_fp16_14))[name = tensor<string, []>("aw_1229_cast_fp16")];
+            tensor<string, []> aw_1231_equation_0 = const()[name = tensor<string, []>("aw_1231_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1231_cast_fp16 = einsum(equation = aw_1231_equation_0, values = (var_8268_cast_fp16_15, var_8246_cast_fp16_15))[name = tensor<string, []>("aw_1231_cast_fp16")];
+            tensor<string, []> aw_1233_equation_0 = const()[name = tensor<string, []>("aw_1233_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1233_cast_fp16 = einsum(equation = aw_1233_equation_0, values = (var_8268_cast_fp16_16, var_8246_cast_fp16_16))[name = tensor<string, []>("aw_1233_cast_fp16")];
+            tensor<string, []> aw_1235_equation_0 = const()[name = tensor<string, []>("aw_1235_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1235_cast_fp16 = einsum(equation = aw_1235_equation_0, values = (var_8268_cast_fp16_17, var_8246_cast_fp16_17))[name = tensor<string, []>("aw_1235_cast_fp16")];
+            tensor<string, []> aw_1237_equation_0 = const()[name = tensor<string, []>("aw_1237_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1237_cast_fp16 = einsum(equation = aw_1237_equation_0, values = (var_8268_cast_fp16_18, var_8246_cast_fp16_18))[name = tensor<string, []>("aw_1237_cast_fp16")];
+            tensor<string, []> aw_1239_equation_0 = const()[name = tensor<string, []>("aw_1239_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1239_cast_fp16 = einsum(equation = aw_1239_equation_0, values = (var_8268_cast_fp16_19, var_8246_cast_fp16_19))[name = tensor<string, []>("aw_1239_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8350_cast_fp16 = softmax(axis = var_8194, x = aw_1201_cast_fp16)[name = tensor<string, []>("op_8350_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8351_cast_fp16 = softmax(axis = var_8194, x = aw_1203_cast_fp16)[name = tensor<string, []>("op_8351_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8352_cast_fp16 = softmax(axis = var_8194, x = aw_1205_cast_fp16)[name = tensor<string, []>("op_8352_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8353_cast_fp16 = softmax(axis = var_8194, x = aw_1207_cast_fp16)[name = tensor<string, []>("op_8353_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8354_cast_fp16 = softmax(axis = var_8194, x = aw_1209_cast_fp16)[name = tensor<string, []>("op_8354_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8355_cast_fp16 = softmax(axis = var_8194, x = aw_1211_cast_fp16)[name = tensor<string, []>("op_8355_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8356_cast_fp16 = softmax(axis = var_8194, x = aw_1213_cast_fp16)[name = tensor<string, []>("op_8356_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8357_cast_fp16 = softmax(axis = var_8194, x = aw_1215_cast_fp16)[name = tensor<string, []>("op_8357_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8358_cast_fp16 = softmax(axis = var_8194, x = aw_1217_cast_fp16)[name = tensor<string, []>("op_8358_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8359_cast_fp16 = softmax(axis = var_8194, x = aw_1219_cast_fp16)[name = tensor<string, []>("op_8359_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8360_cast_fp16 = softmax(axis = var_8194, x = aw_1221_cast_fp16)[name = tensor<string, []>("op_8360_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8361_cast_fp16 = softmax(axis = var_8194, x = aw_1223_cast_fp16)[name = tensor<string, []>("op_8361_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8362_cast_fp16 = softmax(axis = var_8194, x = aw_1225_cast_fp16)[name = tensor<string, []>("op_8362_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8363_cast_fp16 = softmax(axis = var_8194, x = aw_1227_cast_fp16)[name = tensor<string, []>("op_8363_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8364_cast_fp16 = softmax(axis = var_8194, x = aw_1229_cast_fp16)[name = tensor<string, []>("op_8364_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8365_cast_fp16 = softmax(axis = var_8194, x = aw_1231_cast_fp16)[name = tensor<string, []>("op_8365_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8366_cast_fp16 = softmax(axis = var_8194, x = aw_1233_cast_fp16)[name = tensor<string, []>("op_8366_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8367_cast_fp16 = softmax(axis = var_8194, x = aw_1235_cast_fp16)[name = tensor<string, []>("op_8367_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8368_cast_fp16 = softmax(axis = var_8194, x = aw_1237_cast_fp16)[name = tensor<string, []>("op_8368_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8369_cast_fp16 = softmax(axis = var_8194, x = aw_1239_cast_fp16)[name = tensor<string, []>("op_8369_cast_fp16")];
+            tensor<string, []> var_8371_equation_0 = const()[name = tensor<string, []>("op_8371_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8371_cast_fp16 = einsum(equation = var_8371_equation_0, values = (var_8289_cast_fp16_0, var_8350_cast_fp16))[name = tensor<string, []>("op_8371_cast_fp16")];
+            tensor<string, []> var_8373_equation_0 = const()[name = tensor<string, []>("op_8373_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8373_cast_fp16 = einsum(equation = var_8373_equation_0, values = (var_8289_cast_fp16_1, var_8351_cast_fp16))[name = tensor<string, []>("op_8373_cast_fp16")];
+            tensor<string, []> var_8375_equation_0 = const()[name = tensor<string, []>("op_8375_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8375_cast_fp16 = einsum(equation = var_8375_equation_0, values = (var_8289_cast_fp16_2, var_8352_cast_fp16))[name = tensor<string, []>("op_8375_cast_fp16")];
+            tensor<string, []> var_8377_equation_0 = const()[name = tensor<string, []>("op_8377_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8377_cast_fp16 = einsum(equation = var_8377_equation_0, values = (var_8289_cast_fp16_3, var_8353_cast_fp16))[name = tensor<string, []>("op_8377_cast_fp16")];
+            tensor<string, []> var_8379_equation_0 = const()[name = tensor<string, []>("op_8379_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8379_cast_fp16 = einsum(equation = var_8379_equation_0, values = (var_8289_cast_fp16_4, var_8354_cast_fp16))[name = tensor<string, []>("op_8379_cast_fp16")];
+            tensor<string, []> var_8381_equation_0 = const()[name = tensor<string, []>("op_8381_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8381_cast_fp16 = einsum(equation = var_8381_equation_0, values = (var_8289_cast_fp16_5, var_8355_cast_fp16))[name = tensor<string, []>("op_8381_cast_fp16")];
+            tensor<string, []> var_8383_equation_0 = const()[name = tensor<string, []>("op_8383_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8383_cast_fp16 = einsum(equation = var_8383_equation_0, values = (var_8289_cast_fp16_6, var_8356_cast_fp16))[name = tensor<string, []>("op_8383_cast_fp16")];
+            tensor<string, []> var_8385_equation_0 = const()[name = tensor<string, []>("op_8385_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8385_cast_fp16 = einsum(equation = var_8385_equation_0, values = (var_8289_cast_fp16_7, var_8357_cast_fp16))[name = tensor<string, []>("op_8385_cast_fp16")];
+            tensor<string, []> var_8387_equation_0 = const()[name = tensor<string, []>("op_8387_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8387_cast_fp16 = einsum(equation = var_8387_equation_0, values = (var_8289_cast_fp16_8, var_8358_cast_fp16))[name = tensor<string, []>("op_8387_cast_fp16")];
+            tensor<string, []> var_8389_equation_0 = const()[name = tensor<string, []>("op_8389_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8389_cast_fp16 = einsum(equation = var_8389_equation_0, values = (var_8289_cast_fp16_9, var_8359_cast_fp16))[name = tensor<string, []>("op_8389_cast_fp16")];
+            tensor<string, []> var_8391_equation_0 = const()[name = tensor<string, []>("op_8391_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8391_cast_fp16 = einsum(equation = var_8391_equation_0, values = (var_8289_cast_fp16_10, var_8360_cast_fp16))[name = tensor<string, []>("op_8391_cast_fp16")];
+            tensor<string, []> var_8393_equation_0 = const()[name = tensor<string, []>("op_8393_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8393_cast_fp16 = einsum(equation = var_8393_equation_0, values = (var_8289_cast_fp16_11, var_8361_cast_fp16))[name = tensor<string, []>("op_8393_cast_fp16")];
+            tensor<string, []> var_8395_equation_0 = const()[name = tensor<string, []>("op_8395_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8395_cast_fp16 = einsum(equation = var_8395_equation_0, values = (var_8289_cast_fp16_12, var_8362_cast_fp16))[name = tensor<string, []>("op_8395_cast_fp16")];
+            tensor<string, []> var_8397_equation_0 = const()[name = tensor<string, []>("op_8397_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8397_cast_fp16 = einsum(equation = var_8397_equation_0, values = (var_8289_cast_fp16_13, var_8363_cast_fp16))[name = tensor<string, []>("op_8397_cast_fp16")];
+            tensor<string, []> var_8399_equation_0 = const()[name = tensor<string, []>("op_8399_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8399_cast_fp16 = einsum(equation = var_8399_equation_0, values = (var_8289_cast_fp16_14, var_8364_cast_fp16))[name = tensor<string, []>("op_8399_cast_fp16")];
+            tensor<string, []> var_8401_equation_0 = const()[name = tensor<string, []>("op_8401_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8401_cast_fp16 = einsum(equation = var_8401_equation_0, values = (var_8289_cast_fp16_15, var_8365_cast_fp16))[name = tensor<string, []>("op_8401_cast_fp16")];
+            tensor<string, []> var_8403_equation_0 = const()[name = tensor<string, []>("op_8403_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8403_cast_fp16 = einsum(equation = var_8403_equation_0, values = (var_8289_cast_fp16_16, var_8366_cast_fp16))[name = tensor<string, []>("op_8403_cast_fp16")];
+            tensor<string, []> var_8405_equation_0 = const()[name = tensor<string, []>("op_8405_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8405_cast_fp16 = einsum(equation = var_8405_equation_0, values = (var_8289_cast_fp16_17, var_8367_cast_fp16))[name = tensor<string, []>("op_8405_cast_fp16")];
+            tensor<string, []> var_8407_equation_0 = const()[name = tensor<string, []>("op_8407_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8407_cast_fp16 = einsum(equation = var_8407_equation_0, values = (var_8289_cast_fp16_18, var_8368_cast_fp16))[name = tensor<string, []>("op_8407_cast_fp16")];
+            tensor<string, []> var_8409_equation_0 = const()[name = tensor<string, []>("op_8409_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8409_cast_fp16 = einsum(equation = var_8409_equation_0, values = (var_8289_cast_fp16_19, var_8369_cast_fp16))[name = tensor<string, []>("op_8409_cast_fp16")];
+            tensor<bool, []> input_305_interleave_0 = const()[name = tensor<string, []>("input_305_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_305_cast_fp16 = concat(axis = var_8194, interleave = input_305_interleave_0, values = (var_8371_cast_fp16, var_8373_cast_fp16, var_8375_cast_fp16, var_8377_cast_fp16, var_8379_cast_fp16, var_8381_cast_fp16, var_8383_cast_fp16, var_8385_cast_fp16, var_8387_cast_fp16, var_8389_cast_fp16, var_8391_cast_fp16, var_8393_cast_fp16, var_8395_cast_fp16, var_8397_cast_fp16, var_8399_cast_fp16, var_8401_cast_fp16, var_8403_cast_fp16, var_8405_cast_fp16, var_8407_cast_fp16, var_8409_cast_fp16))[name = tensor<string, []>("input_305_cast_fp16")];
+            tensor<string, []> var_8418_pad_type_0 = const()[name = tensor<string, []>("op_8418_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8418_strides_0 = const()[name = tensor<string, []>("op_8418_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8418_pad_0 = const()[name = tensor<string, []>("op_8418_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8418_dilations_0 = const()[name = tensor<string, []>("op_8418_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8418_groups_0 = const()[name = tensor<string, []>("op_8418_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_30_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_30_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1204729792)))];
+            tensor<fp16, [1280]> blocks_30_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_30_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1208006656)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8418_cast_fp16 = conv(bias = blocks_30_attn_out_bias_to_fp16, dilations = var_8418_dilations_0, groups = var_8418_groups_0, pad = var_8418_pad_0, pad_type = var_8418_pad_type_0, strides = var_8418_strides_0, weight = blocks_30_attn_out_weight_to_fp16, x = input_305_cast_fp16)[name = tensor<string, []>("op_8418_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_123_cast_fp16 = add(x = inputs_121_cast_fp16, y = var_8418_cast_fp16)[name = tensor<string, []>("inputs_123_cast_fp16")];
+            tensor<int32, [1]> input_307_axes_0 = const()[name = tensor<string, []>("input_307_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_307_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_307_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1208009280)))];
+            tensor<fp16, [1280]> input_307_beta_0_to_fp16 = const()[name = tensor<string, []>("input_307_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1208011904)))];
+            tensor<fp16, []> var_8428_to_fp16 = const()[name = tensor<string, []>("op_8428_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_307_cast_fp16 = layer_norm(axes = input_307_axes_0, beta = input_307_beta_0_to_fp16, epsilon = var_8428_to_fp16, gamma = input_307_gamma_0_to_fp16, x = inputs_123_cast_fp16)[name = tensor<string, []>("input_307_cast_fp16")];
+            tensor<string, []> input_309_pad_type_0 = const()[name = tensor<string, []>("input_309_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_309_strides_0 = const()[name = tensor<string, []>("input_309_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_309_pad_0 = const()[name = tensor<string, []>("input_309_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_309_dilations_0 = const()[name = tensor<string, []>("input_309_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_309_groups_0 = const()[name = tensor<string, []>("input_309_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_30_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_30_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1208014528)))];
+            tensor<fp16, [5120]> blocks_30_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_30_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1221121792)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_309_cast_fp16 = conv(bias = blocks_30_mlp_0_bias_to_fp16, dilations = input_309_dilations_0, groups = input_309_groups_0, pad = input_309_pad_0, pad_type = input_309_pad_type_0, strides = input_309_strides_0, weight = blocks_30_mlp_0_weight_to_fp16, x = input_307_cast_fp16)[name = tensor<string, []>("input_309_cast_fp16")];
+            tensor<string, []> input_311_mode_0 = const()[name = tensor<string, []>("input_311_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_311_cast_fp16 = gelu(mode = input_311_mode_0, x = input_309_cast_fp16)[name = tensor<string, []>("input_311_cast_fp16")];
+            tensor<string, []> var_8454_pad_type_0 = const()[name = tensor<string, []>("op_8454_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8454_strides_0 = const()[name = tensor<string, []>("op_8454_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8454_pad_0 = const()[name = tensor<string, []>("op_8454_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8454_dilations_0 = const()[name = tensor<string, []>("op_8454_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8454_groups_0 = const()[name = tensor<string, []>("op_8454_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_30_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_30_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1221132096)))];
+            tensor<fp16, [1280]> blocks_30_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_30_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1234239360)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8454_cast_fp16 = conv(bias = blocks_30_mlp_2_bias_to_fp16, dilations = var_8454_dilations_0, groups = var_8454_groups_0, pad = var_8454_pad_0, pad_type = var_8454_pad_type_0, strides = var_8454_strides_0, weight = blocks_30_mlp_2_weight_to_fp16, x = input_311_cast_fp16)[name = tensor<string, []>("op_8454_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_125_cast_fp16 = add(x = inputs_123_cast_fp16, y = var_8454_cast_fp16)[name = tensor<string, []>("inputs_125_cast_fp16")];
+            tensor<int32, []> var_8463 = const()[name = tensor<string, []>("op_8463"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_313_axes_0 = const()[name = tensor<string, []>("input_313_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_313_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_313_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1234241984)))];
+            tensor<fp16, [1280]> input_313_beta_0_to_fp16 = const()[name = tensor<string, []>("input_313_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1234244608)))];
+            tensor<fp16, []> var_8479_to_fp16 = const()[name = tensor<string, []>("op_8479_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_313_cast_fp16 = layer_norm(axes = input_313_axes_0, beta = input_313_beta_0_to_fp16, epsilon = var_8479_to_fp16, gamma = input_313_gamma_0_to_fp16, x = inputs_125_cast_fp16)[name = tensor<string, []>("input_313_cast_fp16")];
+            tensor<string, []> q_pad_type_0 = const()[name = tensor<string, []>("q_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_strides_0 = const()[name = tensor<string, []>("q_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_pad_0 = const()[name = tensor<string, []>("q_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_dilations_0 = const()[name = tensor<string, []>("q_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_groups_0 = const()[name = tensor<string, []>("q_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_8514_weight_0_to_fp16 = const()[name = tensor<string, []>("op_8514_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1234247232)))];
+            tensor<fp16, [1280]> var_8514_bias_0_to_fp16 = const()[name = tensor<string, []>("op_8514_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1237524096)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8514_cast_fp16 = conv(bias = var_8514_bias_0_to_fp16, dilations = q_dilations_0, groups = q_groups_0, pad = q_pad_0, pad_type = q_pad_type_0, strides = q_strides_0, weight = var_8514_weight_0_to_fp16, x = input_313_cast_fp16)[name = tensor<string, []>("op_8514_cast_fp16")];
+            tensor<string, []> k_pad_type_0 = const()[name = tensor<string, []>("k_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_strides_0 = const()[name = tensor<string, []>("k_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_pad_0 = const()[name = tensor<string, []>("k_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_dilations_0 = const()[name = tensor<string, []>("k_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_groups_0 = const()[name = tensor<string, []>("k_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_31_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_31_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1237526720)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_cast_fp16 = conv(dilations = k_dilations_0, groups = k_groups_0, pad = k_pad_0, pad_type = k_pad_type_0, strides = k_strides_0, weight = blocks_31_attn_key_weight_to_fp16, x = input_313_cast_fp16)[name = tensor<string, []>("k_cast_fp16")];
+            tensor<string, []> var_8512_pad_type_0 = const()[name = tensor<string, []>("op_8512_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8512_strides_0 = const()[name = tensor<string, []>("op_8512_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8512_pad_0 = const()[name = tensor<string, []>("op_8512_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8512_dilations_0 = const()[name = tensor<string, []>("op_8512_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8512_groups_0 = const()[name = tensor<string, []>("op_8512_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_31_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_31_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1240803584)))];
+            tensor<fp16, [1280]> blocks_31_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_31_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1244080448)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8512_cast_fp16 = conv(bias = blocks_31_attn_value_bias_to_fp16, dilations = var_8512_dilations_0, groups = var_8512_groups_0, pad = var_8512_pad_0, pad_type = var_8512_pad_type_0, strides = var_8512_strides_0, weight = blocks_31_attn_value_weight_to_fp16, x = input_313_cast_fp16)[name = tensor<string, []>("op_8512_cast_fp16")];
+            tensor<int32, [20]> tile_93 = const()[name = tensor<string, []>("tile_93"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8515_axis_0 = const()[name = tensor<string, []>("op_8515_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_19 = split(axis = var_8515_axis_0, split_sizes = tile_93, x = var_8514_cast_fp16)[name = tensor<string, []>("op_8515_cast_fp16")];
+            tensor<int32, [4]> var_8536_perm_0 = const()[name = tensor<string, []>("op_8536_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_94 = const()[name = tensor<string, []>("tile_94"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8537_axis_0 = const()[name = tensor<string, []>("op_8537_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_8536_cast_fp16 = transpose(perm = var_8536_perm_0, x = k_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_19 = split(axis = var_8537_axis_0, split_sizes = tile_94, x = var_8536_cast_fp16)[name = tensor<string, []>("op_8537_cast_fp16")];
+            tensor<int32, [20]> tile_95 = const()[name = tensor<string, []>("tile_95"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8558_axis_0 = const()[name = tensor<string, []>("op_8558_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_19 = split(axis = var_8558_axis_0, split_sizes = tile_95, x = var_8512_cast_fp16)[name = tensor<string, []>("op_8558_cast_fp16")];
+            tensor<string, []> aw_1241_equation_0 = const()[name = tensor<string, []>("aw_1241_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1241_cast_fp16 = einsum(equation = aw_1241_equation_0, values = (var_8537_cast_fp16_0, var_8515_cast_fp16_0))[name = tensor<string, []>("aw_1241_cast_fp16")];
+            tensor<string, []> aw_1243_equation_0 = const()[name = tensor<string, []>("aw_1243_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1243_cast_fp16 = einsum(equation = aw_1243_equation_0, values = (var_8537_cast_fp16_1, var_8515_cast_fp16_1))[name = tensor<string, []>("aw_1243_cast_fp16")];
+            tensor<string, []> aw_1245_equation_0 = const()[name = tensor<string, []>("aw_1245_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1245_cast_fp16 = einsum(equation = aw_1245_equation_0, values = (var_8537_cast_fp16_2, var_8515_cast_fp16_2))[name = tensor<string, []>("aw_1245_cast_fp16")];
+            tensor<string, []> aw_1247_equation_0 = const()[name = tensor<string, []>("aw_1247_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1247_cast_fp16 = einsum(equation = aw_1247_equation_0, values = (var_8537_cast_fp16_3, var_8515_cast_fp16_3))[name = tensor<string, []>("aw_1247_cast_fp16")];
+            tensor<string, []> aw_1249_equation_0 = const()[name = tensor<string, []>("aw_1249_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1249_cast_fp16 = einsum(equation = aw_1249_equation_0, values = (var_8537_cast_fp16_4, var_8515_cast_fp16_4))[name = tensor<string, []>("aw_1249_cast_fp16")];
+            tensor<string, []> aw_1251_equation_0 = const()[name = tensor<string, []>("aw_1251_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1251_cast_fp16 = einsum(equation = aw_1251_equation_0, values = (var_8537_cast_fp16_5, var_8515_cast_fp16_5))[name = tensor<string, []>("aw_1251_cast_fp16")];
+            tensor<string, []> aw_1253_equation_0 = const()[name = tensor<string, []>("aw_1253_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1253_cast_fp16 = einsum(equation = aw_1253_equation_0, values = (var_8537_cast_fp16_6, var_8515_cast_fp16_6))[name = tensor<string, []>("aw_1253_cast_fp16")];
+            tensor<string, []> aw_1255_equation_0 = const()[name = tensor<string, []>("aw_1255_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1255_cast_fp16 = einsum(equation = aw_1255_equation_0, values = (var_8537_cast_fp16_7, var_8515_cast_fp16_7))[name = tensor<string, []>("aw_1255_cast_fp16")];
+            tensor<string, []> aw_1257_equation_0 = const()[name = tensor<string, []>("aw_1257_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1257_cast_fp16 = einsum(equation = aw_1257_equation_0, values = (var_8537_cast_fp16_8, var_8515_cast_fp16_8))[name = tensor<string, []>("aw_1257_cast_fp16")];
+            tensor<string, []> aw_1259_equation_0 = const()[name = tensor<string, []>("aw_1259_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1259_cast_fp16 = einsum(equation = aw_1259_equation_0, values = (var_8537_cast_fp16_9, var_8515_cast_fp16_9))[name = tensor<string, []>("aw_1259_cast_fp16")];
+            tensor<string, []> aw_1261_equation_0 = const()[name = tensor<string, []>("aw_1261_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1261_cast_fp16 = einsum(equation = aw_1261_equation_0, values = (var_8537_cast_fp16_10, var_8515_cast_fp16_10))[name = tensor<string, []>("aw_1261_cast_fp16")];
+            tensor<string, []> aw_1263_equation_0 = const()[name = tensor<string, []>("aw_1263_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1263_cast_fp16 = einsum(equation = aw_1263_equation_0, values = (var_8537_cast_fp16_11, var_8515_cast_fp16_11))[name = tensor<string, []>("aw_1263_cast_fp16")];
+            tensor<string, []> aw_1265_equation_0 = const()[name = tensor<string, []>("aw_1265_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1265_cast_fp16 = einsum(equation = aw_1265_equation_0, values = (var_8537_cast_fp16_12, var_8515_cast_fp16_12))[name = tensor<string, []>("aw_1265_cast_fp16")];
+            tensor<string, []> aw_1267_equation_0 = const()[name = tensor<string, []>("aw_1267_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1267_cast_fp16 = einsum(equation = aw_1267_equation_0, values = (var_8537_cast_fp16_13, var_8515_cast_fp16_13))[name = tensor<string, []>("aw_1267_cast_fp16")];
+            tensor<string, []> aw_1269_equation_0 = const()[name = tensor<string, []>("aw_1269_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1269_cast_fp16 = einsum(equation = aw_1269_equation_0, values = (var_8537_cast_fp16_14, var_8515_cast_fp16_14))[name = tensor<string, []>("aw_1269_cast_fp16")];
+            tensor<string, []> aw_1271_equation_0 = const()[name = tensor<string, []>("aw_1271_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1271_cast_fp16 = einsum(equation = aw_1271_equation_0, values = (var_8537_cast_fp16_15, var_8515_cast_fp16_15))[name = tensor<string, []>("aw_1271_cast_fp16")];
+            tensor<string, []> aw_1273_equation_0 = const()[name = tensor<string, []>("aw_1273_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1273_cast_fp16 = einsum(equation = aw_1273_equation_0, values = (var_8537_cast_fp16_16, var_8515_cast_fp16_16))[name = tensor<string, []>("aw_1273_cast_fp16")];
+            tensor<string, []> aw_1275_equation_0 = const()[name = tensor<string, []>("aw_1275_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1275_cast_fp16 = einsum(equation = aw_1275_equation_0, values = (var_8537_cast_fp16_17, var_8515_cast_fp16_17))[name = tensor<string, []>("aw_1275_cast_fp16")];
+            tensor<string, []> aw_1277_equation_0 = const()[name = tensor<string, []>("aw_1277_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1277_cast_fp16 = einsum(equation = aw_1277_equation_0, values = (var_8537_cast_fp16_18, var_8515_cast_fp16_18))[name = tensor<string, []>("aw_1277_cast_fp16")];
+            tensor<string, []> aw_equation_0 = const()[name = tensor<string, []>("aw_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_cast_fp16 = einsum(equation = aw_equation_0, values = (var_8537_cast_fp16_19, var_8515_cast_fp16_19))[name = tensor<string, []>("aw_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8619_cast_fp16 = softmax(axis = var_8463, x = aw_1241_cast_fp16)[name = tensor<string, []>("op_8619_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8620_cast_fp16 = softmax(axis = var_8463, x = aw_1243_cast_fp16)[name = tensor<string, []>("op_8620_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8621_cast_fp16 = softmax(axis = var_8463, x = aw_1245_cast_fp16)[name = tensor<string, []>("op_8621_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8622_cast_fp16 = softmax(axis = var_8463, x = aw_1247_cast_fp16)[name = tensor<string, []>("op_8622_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8623_cast_fp16 = softmax(axis = var_8463, x = aw_1249_cast_fp16)[name = tensor<string, []>("op_8623_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8624_cast_fp16 = softmax(axis = var_8463, x = aw_1251_cast_fp16)[name = tensor<string, []>("op_8624_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8625_cast_fp16 = softmax(axis = var_8463, x = aw_1253_cast_fp16)[name = tensor<string, []>("op_8625_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8626_cast_fp16 = softmax(axis = var_8463, x = aw_1255_cast_fp16)[name = tensor<string, []>("op_8626_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8627_cast_fp16 = softmax(axis = var_8463, x = aw_1257_cast_fp16)[name = tensor<string, []>("op_8627_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8628_cast_fp16 = softmax(axis = var_8463, x = aw_1259_cast_fp16)[name = tensor<string, []>("op_8628_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8629_cast_fp16 = softmax(axis = var_8463, x = aw_1261_cast_fp16)[name = tensor<string, []>("op_8629_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8630_cast_fp16 = softmax(axis = var_8463, x = aw_1263_cast_fp16)[name = tensor<string, []>("op_8630_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8631_cast_fp16 = softmax(axis = var_8463, x = aw_1265_cast_fp16)[name = tensor<string, []>("op_8631_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8632_cast_fp16 = softmax(axis = var_8463, x = aw_1267_cast_fp16)[name = tensor<string, []>("op_8632_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8633_cast_fp16 = softmax(axis = var_8463, x = aw_1269_cast_fp16)[name = tensor<string, []>("op_8633_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8634_cast_fp16 = softmax(axis = var_8463, x = aw_1271_cast_fp16)[name = tensor<string, []>("op_8634_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8635_cast_fp16 = softmax(axis = var_8463, x = aw_1273_cast_fp16)[name = tensor<string, []>("op_8635_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8636_cast_fp16 = softmax(axis = var_8463, x = aw_1275_cast_fp16)[name = tensor<string, []>("op_8636_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8637_cast_fp16 = softmax(axis = var_8463, x = aw_1277_cast_fp16)[name = tensor<string, []>("op_8637_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8638_cast_fp16 = softmax(axis = var_8463, x = aw_cast_fp16)[name = tensor<string, []>("op_8638_cast_fp16")];
+            tensor<string, []> var_8640_equation_0 = const()[name = tensor<string, []>("op_8640_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8640_cast_fp16 = einsum(equation = var_8640_equation_0, values = (var_8558_cast_fp16_0, var_8619_cast_fp16))[name = tensor<string, []>("op_8640_cast_fp16")];
+            tensor<string, []> var_8642_equation_0 = const()[name = tensor<string, []>("op_8642_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8642_cast_fp16 = einsum(equation = var_8642_equation_0, values = (var_8558_cast_fp16_1, var_8620_cast_fp16))[name = tensor<string, []>("op_8642_cast_fp16")];
+            tensor<string, []> var_8644_equation_0 = const()[name = tensor<string, []>("op_8644_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8644_cast_fp16 = einsum(equation = var_8644_equation_0, values = (var_8558_cast_fp16_2, var_8621_cast_fp16))[name = tensor<string, []>("op_8644_cast_fp16")];
+            tensor<string, []> var_8646_equation_0 = const()[name = tensor<string, []>("op_8646_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8646_cast_fp16 = einsum(equation = var_8646_equation_0, values = (var_8558_cast_fp16_3, var_8622_cast_fp16))[name = tensor<string, []>("op_8646_cast_fp16")];
+            tensor<string, []> var_8648_equation_0 = const()[name = tensor<string, []>("op_8648_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8648_cast_fp16 = einsum(equation = var_8648_equation_0, values = (var_8558_cast_fp16_4, var_8623_cast_fp16))[name = tensor<string, []>("op_8648_cast_fp16")];
+            tensor<string, []> var_8650_equation_0 = const()[name = tensor<string, []>("op_8650_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8650_cast_fp16 = einsum(equation = var_8650_equation_0, values = (var_8558_cast_fp16_5, var_8624_cast_fp16))[name = tensor<string, []>("op_8650_cast_fp16")];
+            tensor<string, []> var_8652_equation_0 = const()[name = tensor<string, []>("op_8652_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8652_cast_fp16 = einsum(equation = var_8652_equation_0, values = (var_8558_cast_fp16_6, var_8625_cast_fp16))[name = tensor<string, []>("op_8652_cast_fp16")];
+            tensor<string, []> var_8654_equation_0 = const()[name = tensor<string, []>("op_8654_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8654_cast_fp16 = einsum(equation = var_8654_equation_0, values = (var_8558_cast_fp16_7, var_8626_cast_fp16))[name = tensor<string, []>("op_8654_cast_fp16")];
+            tensor<string, []> var_8656_equation_0 = const()[name = tensor<string, []>("op_8656_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8656_cast_fp16 = einsum(equation = var_8656_equation_0, values = (var_8558_cast_fp16_8, var_8627_cast_fp16))[name = tensor<string, []>("op_8656_cast_fp16")];
+            tensor<string, []> var_8658_equation_0 = const()[name = tensor<string, []>("op_8658_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8658_cast_fp16 = einsum(equation = var_8658_equation_0, values = (var_8558_cast_fp16_9, var_8628_cast_fp16))[name = tensor<string, []>("op_8658_cast_fp16")];
+            tensor<string, []> var_8660_equation_0 = const()[name = tensor<string, []>("op_8660_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8660_cast_fp16 = einsum(equation = var_8660_equation_0, values = (var_8558_cast_fp16_10, var_8629_cast_fp16))[name = tensor<string, []>("op_8660_cast_fp16")];
+            tensor<string, []> var_8662_equation_0 = const()[name = tensor<string, []>("op_8662_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8662_cast_fp16 = einsum(equation = var_8662_equation_0, values = (var_8558_cast_fp16_11, var_8630_cast_fp16))[name = tensor<string, []>("op_8662_cast_fp16")];
+            tensor<string, []> var_8664_equation_0 = const()[name = tensor<string, []>("op_8664_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8664_cast_fp16 = einsum(equation = var_8664_equation_0, values = (var_8558_cast_fp16_12, var_8631_cast_fp16))[name = tensor<string, []>("op_8664_cast_fp16")];
+            tensor<string, []> var_8666_equation_0 = const()[name = tensor<string, []>("op_8666_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8666_cast_fp16 = einsum(equation = var_8666_equation_0, values = (var_8558_cast_fp16_13, var_8632_cast_fp16))[name = tensor<string, []>("op_8666_cast_fp16")];
+            tensor<string, []> var_8668_equation_0 = const()[name = tensor<string, []>("op_8668_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8668_cast_fp16 = einsum(equation = var_8668_equation_0, values = (var_8558_cast_fp16_14, var_8633_cast_fp16))[name = tensor<string, []>("op_8668_cast_fp16")];
+            tensor<string, []> var_8670_equation_0 = const()[name = tensor<string, []>("op_8670_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8670_cast_fp16 = einsum(equation = var_8670_equation_0, values = (var_8558_cast_fp16_15, var_8634_cast_fp16))[name = tensor<string, []>("op_8670_cast_fp16")];
+            tensor<string, []> var_8672_equation_0 = const()[name = tensor<string, []>("op_8672_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8672_cast_fp16 = einsum(equation = var_8672_equation_0, values = (var_8558_cast_fp16_16, var_8635_cast_fp16))[name = tensor<string, []>("op_8672_cast_fp16")];
+            tensor<string, []> var_8674_equation_0 = const()[name = tensor<string, []>("op_8674_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8674_cast_fp16 = einsum(equation = var_8674_equation_0, values = (var_8558_cast_fp16_17, var_8636_cast_fp16))[name = tensor<string, []>("op_8674_cast_fp16")];
+            tensor<string, []> var_8676_equation_0 = const()[name = tensor<string, []>("op_8676_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8676_cast_fp16 = einsum(equation = var_8676_equation_0, values = (var_8558_cast_fp16_18, var_8637_cast_fp16))[name = tensor<string, []>("op_8676_cast_fp16")];
+            tensor<string, []> var_8678_equation_0 = const()[name = tensor<string, []>("op_8678_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8678_cast_fp16 = einsum(equation = var_8678_equation_0, values = (var_8558_cast_fp16_19, var_8638_cast_fp16))[name = tensor<string, []>("op_8678_cast_fp16")];
+            tensor<bool, []> input_315_interleave_0 = const()[name = tensor<string, []>("input_315_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_315_cast_fp16 = concat(axis = var_8463, interleave = input_315_interleave_0, values = (var_8640_cast_fp16, var_8642_cast_fp16, var_8644_cast_fp16, var_8646_cast_fp16, var_8648_cast_fp16, var_8650_cast_fp16, var_8652_cast_fp16, var_8654_cast_fp16, var_8656_cast_fp16, var_8658_cast_fp16, var_8660_cast_fp16, var_8662_cast_fp16, var_8664_cast_fp16, var_8666_cast_fp16, var_8668_cast_fp16, var_8670_cast_fp16, var_8672_cast_fp16, var_8674_cast_fp16, var_8676_cast_fp16, var_8678_cast_fp16))[name = tensor<string, []>("input_315_cast_fp16")];
+            tensor<string, []> var_8687_pad_type_0 = const()[name = tensor<string, []>("op_8687_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8687_strides_0 = const()[name = tensor<string, []>("op_8687_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8687_pad_0 = const()[name = tensor<string, []>("op_8687_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8687_dilations_0 = const()[name = tensor<string, []>("op_8687_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8687_groups_0 = const()[name = tensor<string, []>("op_8687_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_31_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_31_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1244083072)))];
+            tensor<fp16, [1280]> blocks_31_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_31_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1247359936)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8687_cast_fp16 = conv(bias = blocks_31_attn_out_bias_to_fp16, dilations = var_8687_dilations_0, groups = var_8687_groups_0, pad = var_8687_pad_0, pad_type = var_8687_pad_type_0, strides = var_8687_strides_0, weight = blocks_31_attn_out_weight_to_fp16, x = input_315_cast_fp16)[name = tensor<string, []>("op_8687_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_127_cast_fp16 = add(x = inputs_125_cast_fp16, y = var_8687_cast_fp16)[name = tensor<string, []>("inputs_127_cast_fp16")];
+            tensor<int32, [1]> input_317_axes_0 = const()[name = tensor<string, []>("input_317_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_317_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_317_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1247362560)))];
+            tensor<fp16, [1280]> input_317_beta_0_to_fp16 = const()[name = tensor<string, []>("input_317_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1247365184)))];
+            tensor<fp16, []> var_8697_to_fp16 = const()[name = tensor<string, []>("op_8697_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_317_cast_fp16 = layer_norm(axes = input_317_axes_0, beta = input_317_beta_0_to_fp16, epsilon = var_8697_to_fp16, gamma = input_317_gamma_0_to_fp16, x = inputs_127_cast_fp16)[name = tensor<string, []>("input_317_cast_fp16")];
+            tensor<string, []> input_319_pad_type_0 = const()[name = tensor<string, []>("input_319_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_319_strides_0 = const()[name = tensor<string, []>("input_319_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_319_pad_0 = const()[name = tensor<string, []>("input_319_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_319_dilations_0 = const()[name = tensor<string, []>("input_319_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_319_groups_0 = const()[name = tensor<string, []>("input_319_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_31_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_31_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1247367808)))];
+            tensor<fp16, [5120]> blocks_31_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_31_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1260475072)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_319_cast_fp16 = conv(bias = blocks_31_mlp_0_bias_to_fp16, dilations = input_319_dilations_0, groups = input_319_groups_0, pad = input_319_pad_0, pad_type = input_319_pad_type_0, strides = input_319_strides_0, weight = blocks_31_mlp_0_weight_to_fp16, x = input_317_cast_fp16)[name = tensor<string, []>("input_319_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_319_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<string, []> var_8723_pad_type_0 = const()[name = tensor<string, []>("op_8723_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8723_strides_0 = const()[name = tensor<string, []>("op_8723_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8723_pad_0 = const()[name = tensor<string, []>("op_8723_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8723_dilations_0 = const()[name = tensor<string, []>("op_8723_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8723_groups_0 = const()[name = tensor<string, []>("op_8723_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_31_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_31_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1260485376)))];
+            tensor<fp16, [1280]> blocks_31_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_31_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1273592640)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8723_cast_fp16 = conv(bias = blocks_31_mlp_2_bias_to_fp16, dilations = var_8723_dilations_0, groups = var_8723_groups_0, pad = var_8723_pad_0, pad_type = var_8723_pad_type_0, strides = var_8723_strides_0, weight = blocks_31_mlp_2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("op_8723_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_cast_fp16 = add(x = inputs_127_cast_fp16, y = var_8723_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<int32, [1]> x_axes_0 = const()[name = tensor<string, []>("x_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> x_gamma_0_to_fp16 = const()[name = tensor<string, []>("x_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1273595264)))];
+            tensor<fp16, [1280]> x_beta_0_to_fp16 = const()[name = tensor<string, []>("x_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1273597888)))];
+            tensor<fp16, []> var_8737_to_fp16 = const()[name = tensor<string, []>("op_8737_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> x_cast_fp16 = layer_norm(axes = x_axes_0, beta = x_beta_0_to_fp16, epsilon = var_8737_to_fp16, gamma = x_gamma_0_to_fp16, x = inputs_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
+            tensor<int32, [1]> var_8748_axes_0 = const()[name = tensor<string, []>("op_8748_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1280, 1500]> var_8748_cast_fp16 = squeeze(axes = var_8748_axes_0, x = x_cast_fp16)[name = tensor<string, []>("op_8748_cast_fp16")];
+            tensor<int32, [3]> var_8751_perm_0 = const()[name = tensor<string, []>("op_8751_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<string, []> var_8751_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_8751_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
+            tensor<fp16, [1, 1500, 1280]> var_8751_cast_fp16 = transpose(perm = var_8751_perm_0, x = var_8748_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp32, [1, 1500, 1280]> output = cast(dtype = var_8751_cast_fp16_to_fp32_dtype_0, x = var_8751_cast_fp16)[name = tensor<string, []>("cast_131")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/large-v1/ggml-large-v1-encoder.mlmodelc/weights/weight.bin b/large-v1/ggml-large-v1-encoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2a379fed2102fb3298baa3bb24316bb02adb9800
--- /dev/null
+++ b/large-v1/ggml-large-v1-encoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac5cd426142b4d97debcc46d2f536ae257ad80cc168790faf2ad3dcc862462a6
+size 1273600512
diff --git a/large-v1/ggml-large-v1.bin b/large-v1/ggml-large-v1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..047db73ddfd7789113dfc94c20b22bc1d044586f
--- /dev/null
+++ b/large-v1/ggml-large-v1.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d99f41a10525d0206bddadd86760181fa920438b6b33237e3118ff6c83bb53d
+size 3094623691
diff --git a/large-v2/.DS_Store b/large-v2/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..931d2f48b3a38feb68e3c2dd9805e9f49f666377
Binary files /dev/null and b/large-v2/.DS_Store differ
diff --git a/large-v2/ggml-large-v2-encoder.mlmodelc/analytics/coremldata.bin b/large-v2/ggml-large-v2-encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6dc984a46d0a814f3f51e1b9a397639a2cae2e9e
--- /dev/null
+++ b/large-v2/ggml-large-v2-encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ab6e6c98293af2c12bf584cdda19a464709cef895b408c568445cd111d206b9
+size 243
diff --git a/large-v2/ggml-large-v2-encoder.mlmodelc/coremldata.bin b/large-v2/ggml-large-v2-encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b1e529c8edbd7450c061f5421f9af2c3995ecfa3
--- /dev/null
+++ b/large-v2/ggml-large-v2-encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e10fcaa761a372b098ec16f7dabc66ca8334e6970a99cffba37006dd5f8decb
+size 320
diff --git a/large-v2/ggml-large-v2-encoder.mlmodelc/metadata.json b/large-v2/ggml-large-v2-encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..dc3b811679306c0d52d790854aa514c6a0efe9ca
--- /dev/null
+++ b/large-v2/ggml-large-v2-encoder.mlmodelc/metadata.json
@@ -0,0 +1,71 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1500 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 1280]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 6,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 32,
+      "Gelu" : 34,
+      "LayerNorm" : 65,
+      "Transpose" : 33,
+      "Softmax" : 640,
+      "Squeeze" : 1,
+      "Cast" : 2,
+      "Add" : 65,
+      "Einsum" : 1280,
+      "ExpandDims" : 1,
+      "Split" : 96,
+      "Conv" : 194
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "12.0",
+      "tvOS" : "15.0",
+      "visionOS" : "1.0",
+      "watchOS" : "8.0",
+      "iOS" : "15.0",
+      "macCatalyst" : "15.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.version" : "8.3.0",
+      "com.github.apple.coremltools.source" : "torch==2.2.2"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 80 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "coreml_encoder_large_v2",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/large-v2/ggml-large-v2-encoder.mlmodelc/model.mil b/large-v2/ggml-large-v2-encoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..8ab8cd9e6674b7e4122ebb511e9de5d9978c2bb6
--- /dev/null
+++ b/large-v2/ggml-large-v2-encoder.mlmodelc/model.mil
@@ -0,0 +1,5643 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.2.2"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
+{
+    func main<ios15>(tensor<fp32, [1, 80, 3000]> logmel_data) {
+            tensor<string, []> var_84_pad_type_0 = const()[name = tensor<string, []>("op_84_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_84_pad_0 = const()[name = tensor<string, []>("op_84_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_84_strides_0 = const()[name = tensor<string, []>("op_84_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_84_dilations_0 = const()[name = tensor<string, []>("op_84_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_84_groups_0 = const()[name = tensor<string, []>("op_84_groups_0"), val = tensor<int32, []>(1)];
+            tensor<string, []> logmel_data_to_fp16_dtype_0 = const()[name = tensor<string, []>("logmel_data_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [1280, 80, 3]> const_0_to_fp16 = const()[name = tensor<string, []>("const_0_to_fp16"), val = tensor<fp16, [1280, 80, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1280]> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614528)))];
+            tensor<fp16, [1, 80, 3000]> logmel_data_to_fp16 = cast(dtype = logmel_data_to_fp16_dtype_0, x = logmel_data)[name = tensor<string, []>("cast_132")];
+            tensor<fp16, [1, 1280, 3000]> var_84_cast_fp16 = conv(bias = const_1_to_fp16, dilations = var_84_dilations_0, groups = var_84_groups_0, pad = var_84_pad_0, pad_type = var_84_pad_type_0, strides = var_84_strides_0, weight = const_0_to_fp16, x = logmel_data_to_fp16)[name = tensor<string, []>("op_84_cast_fp16")];
+            tensor<string, []> input_1_mode_0 = const()[name = tensor<string, []>("input_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1280, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_84_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<string, []> var_102_pad_type_0 = const()[name = tensor<string, []>("op_102_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_102_pad_0 = const()[name = tensor<string, []>("op_102_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_102_strides_0 = const()[name = tensor<string, []>("op_102_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_102_dilations_0 = const()[name = tensor<string, []>("op_102_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_102_groups_0 = const()[name = tensor<string, []>("op_102_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 3]> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, [1280, 1280, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(617152)))];
+            tensor<fp16, [1280]> const_3_to_fp16 = const()[name = tensor<string, []>("const_3_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10447616)))];
+            tensor<fp16, [1, 1280, 1500]> var_102_cast_fp16 = conv(bias = const_3_to_fp16, dilations = var_102_dilations_0, groups = var_102_groups_0, pad = var_102_pad_0, pad_type = var_102_pad_type_0, strides = var_102_strides_0, weight = const_2_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("op_102_cast_fp16")];
+            tensor<string, []> x_3_mode_0 = const()[name = tensor<string, []>("x_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1280, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_102_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
+            tensor<fp16, [1280, 1500]> var_107_to_fp16 = const()[name = tensor<string, []>("op_107_to_fp16"), val = tensor<fp16, [1280, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10450240)))];
+            tensor<fp16, [1, 1280, 1500]> var_109_cast_fp16 = add(x = x_3_cast_fp16, y = var_107_to_fp16)[name = tensor<string, []>("op_109_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_109_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_124 = const()[name = tensor<string, []>("op_124"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14290304)))];
+            tensor<fp16, [1280]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14292928)))];
+            tensor<fp16, []> var_140_to_fp16 = const()[name = tensor<string, []>("op_140_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_3_cast_fp16 = layer_norm(axes = input_3_axes_0, beta = input_3_beta_0_to_fp16, epsilon = var_140_to_fp16, gamma = input_3_gamma_0_to_fp16, x = inputs_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<string, []> q_1_pad_type_0 = const()[name = tensor<string, []>("q_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_1_strides_0 = const()[name = tensor<string, []>("q_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_1_pad_0 = const()[name = tensor<string, []>("q_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_1_dilations_0 = const()[name = tensor<string, []>("q_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_1_groups_0 = const()[name = tensor<string, []>("q_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_175_weight_0_to_fp16 = const()[name = tensor<string, []>("op_175_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14295552)))];
+            tensor<fp16, [1280]> var_175_bias_0_to_fp16 = const()[name = tensor<string, []>("op_175_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17572416)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_175_cast_fp16 = conv(bias = var_175_bias_0_to_fp16, dilations = q_1_dilations_0, groups = q_1_groups_0, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = q_1_strides_0, weight = var_175_weight_0_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_175_cast_fp16")];
+            tensor<string, []> k_1_pad_type_0 = const()[name = tensor<string, []>("k_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_1_strides_0 = const()[name = tensor<string, []>("k_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_1_pad_0 = const()[name = tensor<string, []>("k_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_1_dilations_0 = const()[name = tensor<string, []>("k_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_1_groups_0 = const()[name = tensor<string, []>("k_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_0_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17575040)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_1_cast_fp16 = conv(dilations = k_1_dilations_0, groups = k_1_groups_0, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = k_1_strides_0, weight = blocks_0_attn_key_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("k_1_cast_fp16")];
+            tensor<string, []> var_173_pad_type_0 = const()[name = tensor<string, []>("op_173_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_173_strides_0 = const()[name = tensor<string, []>("op_173_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_173_pad_0 = const()[name = tensor<string, []>("op_173_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_173_dilations_0 = const()[name = tensor<string, []>("op_173_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_173_groups_0 = const()[name = tensor<string, []>("op_173_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_0_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20851904)))];
+            tensor<fp16, [1280]> blocks_0_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24128768)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_173_cast_fp16 = conv(bias = blocks_0_attn_value_bias_to_fp16, dilations = var_173_dilations_0, groups = var_173_groups_0, pad = var_173_pad_0, pad_type = var_173_pad_type_0, strides = var_173_strides_0, weight = blocks_0_attn_value_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_173_cast_fp16")];
+            tensor<int32, [20]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_176_axis_0 = const()[name = tensor<string, []>("op_176_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_19 = split(axis = var_176_axis_0, split_sizes = tile_0, x = var_175_cast_fp16)[name = tensor<string, []>("op_176_cast_fp16")];
+            tensor<int32, [4]> var_197_perm_0 = const()[name = tensor<string, []>("op_197_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_198_axis_0 = const()[name = tensor<string, []>("op_198_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_197_cast_fp16 = transpose(perm = var_197_perm_0, x = k_1_cast_fp16)[name = tensor<string, []>("transpose_32")];
+            tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_19 = split(axis = var_198_axis_0, split_sizes = tile_1, x = var_197_cast_fp16)[name = tensor<string, []>("op_198_cast_fp16")];
+            tensor<int32, [20]> tile_2 = const()[name = tensor<string, []>("tile_2"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_219_axis_0 = const()[name = tensor<string, []>("op_219_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_19 = split(axis = var_219_axis_0, split_sizes = tile_2, x = var_173_cast_fp16)[name = tensor<string, []>("op_219_cast_fp16")];
+            tensor<string, []> aw_1_equation_0 = const()[name = tensor<string, []>("aw_1_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1_cast_fp16 = einsum(equation = aw_1_equation_0, values = (var_198_cast_fp16_0, var_176_cast_fp16_0))[name = tensor<string, []>("aw_1_cast_fp16")];
+            tensor<string, []> aw_3_equation_0 = const()[name = tensor<string, []>("aw_3_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_3_cast_fp16 = einsum(equation = aw_3_equation_0, values = (var_198_cast_fp16_1, var_176_cast_fp16_1))[name = tensor<string, []>("aw_3_cast_fp16")];
+            tensor<string, []> aw_5_equation_0 = const()[name = tensor<string, []>("aw_5_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_5_cast_fp16 = einsum(equation = aw_5_equation_0, values = (var_198_cast_fp16_2, var_176_cast_fp16_2))[name = tensor<string, []>("aw_5_cast_fp16")];
+            tensor<string, []> aw_7_equation_0 = const()[name = tensor<string, []>("aw_7_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_7_cast_fp16 = einsum(equation = aw_7_equation_0, values = (var_198_cast_fp16_3, var_176_cast_fp16_3))[name = tensor<string, []>("aw_7_cast_fp16")];
+            tensor<string, []> aw_9_equation_0 = const()[name = tensor<string, []>("aw_9_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_9_cast_fp16 = einsum(equation = aw_9_equation_0, values = (var_198_cast_fp16_4, var_176_cast_fp16_4))[name = tensor<string, []>("aw_9_cast_fp16")];
+            tensor<string, []> aw_11_equation_0 = const()[name = tensor<string, []>("aw_11_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_11_cast_fp16 = einsum(equation = aw_11_equation_0, values = (var_198_cast_fp16_5, var_176_cast_fp16_5))[name = tensor<string, []>("aw_11_cast_fp16")];
+            tensor<string, []> aw_13_equation_0 = const()[name = tensor<string, []>("aw_13_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_13_cast_fp16 = einsum(equation = aw_13_equation_0, values = (var_198_cast_fp16_6, var_176_cast_fp16_6))[name = tensor<string, []>("aw_13_cast_fp16")];
+            tensor<string, []> aw_15_equation_0 = const()[name = tensor<string, []>("aw_15_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_15_cast_fp16 = einsum(equation = aw_15_equation_0, values = (var_198_cast_fp16_7, var_176_cast_fp16_7))[name = tensor<string, []>("aw_15_cast_fp16")];
+            tensor<string, []> aw_17_equation_0 = const()[name = tensor<string, []>("aw_17_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_17_cast_fp16 = einsum(equation = aw_17_equation_0, values = (var_198_cast_fp16_8, var_176_cast_fp16_8))[name = tensor<string, []>("aw_17_cast_fp16")];
+            tensor<string, []> aw_19_equation_0 = const()[name = tensor<string, []>("aw_19_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_19_cast_fp16 = einsum(equation = aw_19_equation_0, values = (var_198_cast_fp16_9, var_176_cast_fp16_9))[name = tensor<string, []>("aw_19_cast_fp16")];
+            tensor<string, []> aw_21_equation_0 = const()[name = tensor<string, []>("aw_21_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_21_cast_fp16 = einsum(equation = aw_21_equation_0, values = (var_198_cast_fp16_10, var_176_cast_fp16_10))[name = tensor<string, []>("aw_21_cast_fp16")];
+            tensor<string, []> aw_23_equation_0 = const()[name = tensor<string, []>("aw_23_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_23_cast_fp16 = einsum(equation = aw_23_equation_0, values = (var_198_cast_fp16_11, var_176_cast_fp16_11))[name = tensor<string, []>("aw_23_cast_fp16")];
+            tensor<string, []> aw_25_equation_0 = const()[name = tensor<string, []>("aw_25_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_25_cast_fp16 = einsum(equation = aw_25_equation_0, values = (var_198_cast_fp16_12, var_176_cast_fp16_12))[name = tensor<string, []>("aw_25_cast_fp16")];
+            tensor<string, []> aw_27_equation_0 = const()[name = tensor<string, []>("aw_27_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_27_cast_fp16 = einsum(equation = aw_27_equation_0, values = (var_198_cast_fp16_13, var_176_cast_fp16_13))[name = tensor<string, []>("aw_27_cast_fp16")];
+            tensor<string, []> aw_29_equation_0 = const()[name = tensor<string, []>("aw_29_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_29_cast_fp16 = einsum(equation = aw_29_equation_0, values = (var_198_cast_fp16_14, var_176_cast_fp16_14))[name = tensor<string, []>("aw_29_cast_fp16")];
+            tensor<string, []> aw_31_equation_0 = const()[name = tensor<string, []>("aw_31_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_31_cast_fp16 = einsum(equation = aw_31_equation_0, values = (var_198_cast_fp16_15, var_176_cast_fp16_15))[name = tensor<string, []>("aw_31_cast_fp16")];
+            tensor<string, []> aw_33_equation_0 = const()[name = tensor<string, []>("aw_33_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_33_cast_fp16 = einsum(equation = aw_33_equation_0, values = (var_198_cast_fp16_16, var_176_cast_fp16_16))[name = tensor<string, []>("aw_33_cast_fp16")];
+            tensor<string, []> aw_35_equation_0 = const()[name = tensor<string, []>("aw_35_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_35_cast_fp16 = einsum(equation = aw_35_equation_0, values = (var_198_cast_fp16_17, var_176_cast_fp16_17))[name = tensor<string, []>("aw_35_cast_fp16")];
+            tensor<string, []> aw_37_equation_0 = const()[name = tensor<string, []>("aw_37_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_37_cast_fp16 = einsum(equation = aw_37_equation_0, values = (var_198_cast_fp16_18, var_176_cast_fp16_18))[name = tensor<string, []>("aw_37_cast_fp16")];
+            tensor<string, []> aw_39_equation_0 = const()[name = tensor<string, []>("aw_39_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_39_cast_fp16 = einsum(equation = aw_39_equation_0, values = (var_198_cast_fp16_19, var_176_cast_fp16_19))[name = tensor<string, []>("aw_39_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_280_cast_fp16 = softmax(axis = var_124, x = aw_1_cast_fp16)[name = tensor<string, []>("op_280_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_281_cast_fp16 = softmax(axis = var_124, x = aw_3_cast_fp16)[name = tensor<string, []>("op_281_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_282_cast_fp16 = softmax(axis = var_124, x = aw_5_cast_fp16)[name = tensor<string, []>("op_282_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_283_cast_fp16 = softmax(axis = var_124, x = aw_7_cast_fp16)[name = tensor<string, []>("op_283_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_284_cast_fp16 = softmax(axis = var_124, x = aw_9_cast_fp16)[name = tensor<string, []>("op_284_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_285_cast_fp16 = softmax(axis = var_124, x = aw_11_cast_fp16)[name = tensor<string, []>("op_285_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_286_cast_fp16 = softmax(axis = var_124, x = aw_13_cast_fp16)[name = tensor<string, []>("op_286_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_287_cast_fp16 = softmax(axis = var_124, x = aw_15_cast_fp16)[name = tensor<string, []>("op_287_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_288_cast_fp16 = softmax(axis = var_124, x = aw_17_cast_fp16)[name = tensor<string, []>("op_288_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_289_cast_fp16 = softmax(axis = var_124, x = aw_19_cast_fp16)[name = tensor<string, []>("op_289_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_290_cast_fp16 = softmax(axis = var_124, x = aw_21_cast_fp16)[name = tensor<string, []>("op_290_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_291_cast_fp16 = softmax(axis = var_124, x = aw_23_cast_fp16)[name = tensor<string, []>("op_291_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_292_cast_fp16 = softmax(axis = var_124, x = aw_25_cast_fp16)[name = tensor<string, []>("op_292_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_293_cast_fp16 = softmax(axis = var_124, x = aw_27_cast_fp16)[name = tensor<string, []>("op_293_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_294_cast_fp16 = softmax(axis = var_124, x = aw_29_cast_fp16)[name = tensor<string, []>("op_294_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_295_cast_fp16 = softmax(axis = var_124, x = aw_31_cast_fp16)[name = tensor<string, []>("op_295_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_296_cast_fp16 = softmax(axis = var_124, x = aw_33_cast_fp16)[name = tensor<string, []>("op_296_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_297_cast_fp16 = softmax(axis = var_124, x = aw_35_cast_fp16)[name = tensor<string, []>("op_297_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_298_cast_fp16 = softmax(axis = var_124, x = aw_37_cast_fp16)[name = tensor<string, []>("op_298_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_299_cast_fp16 = softmax(axis = var_124, x = aw_39_cast_fp16)[name = tensor<string, []>("op_299_cast_fp16")];
+            tensor<string, []> var_301_equation_0 = const()[name = tensor<string, []>("op_301_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_301_cast_fp16 = einsum(equation = var_301_equation_0, values = (var_219_cast_fp16_0, var_280_cast_fp16))[name = tensor<string, []>("op_301_cast_fp16")];
+            tensor<string, []> var_303_equation_0 = const()[name = tensor<string, []>("op_303_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_303_cast_fp16 = einsum(equation = var_303_equation_0, values = (var_219_cast_fp16_1, var_281_cast_fp16))[name = tensor<string, []>("op_303_cast_fp16")];
+            tensor<string, []> var_305_equation_0 = const()[name = tensor<string, []>("op_305_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_305_cast_fp16 = einsum(equation = var_305_equation_0, values = (var_219_cast_fp16_2, var_282_cast_fp16))[name = tensor<string, []>("op_305_cast_fp16")];
+            tensor<string, []> var_307_equation_0 = const()[name = tensor<string, []>("op_307_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_307_cast_fp16 = einsum(equation = var_307_equation_0, values = (var_219_cast_fp16_3, var_283_cast_fp16))[name = tensor<string, []>("op_307_cast_fp16")];
+            tensor<string, []> var_309_equation_0 = const()[name = tensor<string, []>("op_309_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_309_cast_fp16 = einsum(equation = var_309_equation_0, values = (var_219_cast_fp16_4, var_284_cast_fp16))[name = tensor<string, []>("op_309_cast_fp16")];
+            tensor<string, []> var_311_equation_0 = const()[name = tensor<string, []>("op_311_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_311_cast_fp16 = einsum(equation = var_311_equation_0, values = (var_219_cast_fp16_5, var_285_cast_fp16))[name = tensor<string, []>("op_311_cast_fp16")];
+            tensor<string, []> var_313_equation_0 = const()[name = tensor<string, []>("op_313_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_313_cast_fp16 = einsum(equation = var_313_equation_0, values = (var_219_cast_fp16_6, var_286_cast_fp16))[name = tensor<string, []>("op_313_cast_fp16")];
+            tensor<string, []> var_315_equation_0 = const()[name = tensor<string, []>("op_315_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_315_cast_fp16 = einsum(equation = var_315_equation_0, values = (var_219_cast_fp16_7, var_287_cast_fp16))[name = tensor<string, []>("op_315_cast_fp16")];
+            tensor<string, []> var_317_equation_0 = const()[name = tensor<string, []>("op_317_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_317_cast_fp16 = einsum(equation = var_317_equation_0, values = (var_219_cast_fp16_8, var_288_cast_fp16))[name = tensor<string, []>("op_317_cast_fp16")];
+            tensor<string, []> var_319_equation_0 = const()[name = tensor<string, []>("op_319_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_319_cast_fp16 = einsum(equation = var_319_equation_0, values = (var_219_cast_fp16_9, var_289_cast_fp16))[name = tensor<string, []>("op_319_cast_fp16")];
+            tensor<string, []> var_321_equation_0 = const()[name = tensor<string, []>("op_321_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_321_cast_fp16 = einsum(equation = var_321_equation_0, values = (var_219_cast_fp16_10, var_290_cast_fp16))[name = tensor<string, []>("op_321_cast_fp16")];
+            tensor<string, []> var_323_equation_0 = const()[name = tensor<string, []>("op_323_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_323_cast_fp16 = einsum(equation = var_323_equation_0, values = (var_219_cast_fp16_11, var_291_cast_fp16))[name = tensor<string, []>("op_323_cast_fp16")];
+            tensor<string, []> var_325_equation_0 = const()[name = tensor<string, []>("op_325_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_325_cast_fp16 = einsum(equation = var_325_equation_0, values = (var_219_cast_fp16_12, var_292_cast_fp16))[name = tensor<string, []>("op_325_cast_fp16")];
+            tensor<string, []> var_327_equation_0 = const()[name = tensor<string, []>("op_327_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_327_cast_fp16 = einsum(equation = var_327_equation_0, values = (var_219_cast_fp16_13, var_293_cast_fp16))[name = tensor<string, []>("op_327_cast_fp16")];
+            tensor<string, []> var_329_equation_0 = const()[name = tensor<string, []>("op_329_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_329_cast_fp16 = einsum(equation = var_329_equation_0, values = (var_219_cast_fp16_14, var_294_cast_fp16))[name = tensor<string, []>("op_329_cast_fp16")];
+            tensor<string, []> var_331_equation_0 = const()[name = tensor<string, []>("op_331_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_331_cast_fp16 = einsum(equation = var_331_equation_0, values = (var_219_cast_fp16_15, var_295_cast_fp16))[name = tensor<string, []>("op_331_cast_fp16")];
+            tensor<string, []> var_333_equation_0 = const()[name = tensor<string, []>("op_333_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_333_cast_fp16 = einsum(equation = var_333_equation_0, values = (var_219_cast_fp16_16, var_296_cast_fp16))[name = tensor<string, []>("op_333_cast_fp16")];
+            tensor<string, []> var_335_equation_0 = const()[name = tensor<string, []>("op_335_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_335_cast_fp16 = einsum(equation = var_335_equation_0, values = (var_219_cast_fp16_17, var_297_cast_fp16))[name = tensor<string, []>("op_335_cast_fp16")];
+            tensor<string, []> var_337_equation_0 = const()[name = tensor<string, []>("op_337_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_337_cast_fp16 = einsum(equation = var_337_equation_0, values = (var_219_cast_fp16_18, var_298_cast_fp16))[name = tensor<string, []>("op_337_cast_fp16")];
+            tensor<string, []> var_339_equation_0 = const()[name = tensor<string, []>("op_339_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_339_cast_fp16 = einsum(equation = var_339_equation_0, values = (var_219_cast_fp16_19, var_299_cast_fp16))[name = tensor<string, []>("op_339_cast_fp16")];
+            tensor<bool, []> input_5_interleave_0 = const()[name = tensor<string, []>("input_5_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_5_cast_fp16 = concat(axis = var_124, interleave = input_5_interleave_0, values = (var_301_cast_fp16, var_303_cast_fp16, var_305_cast_fp16, var_307_cast_fp16, var_309_cast_fp16, var_311_cast_fp16, var_313_cast_fp16, var_315_cast_fp16, var_317_cast_fp16, var_319_cast_fp16, var_321_cast_fp16, var_323_cast_fp16, var_325_cast_fp16, var_327_cast_fp16, var_329_cast_fp16, var_331_cast_fp16, var_333_cast_fp16, var_335_cast_fp16, var_337_cast_fp16, var_339_cast_fp16))[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> var_348_pad_type_0 = const()[name = tensor<string, []>("op_348_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_348_strides_0 = const()[name = tensor<string, []>("op_348_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_348_pad_0 = const()[name = tensor<string, []>("op_348_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_348_dilations_0 = const()[name = tensor<string, []>("op_348_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_348_groups_0 = const()[name = tensor<string, []>("op_348_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_0_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24131392)))];
+            tensor<fp16, [1280]> blocks_0_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27408256)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_348_cast_fp16 = conv(bias = blocks_0_attn_out_bias_to_fp16, dilations = var_348_dilations_0, groups = var_348_groups_0, pad = var_348_pad_0, pad_type = var_348_pad_type_0, strides = var_348_strides_0, weight = blocks_0_attn_out_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("op_348_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = var_348_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> input_7_axes_0 = const()[name = tensor<string, []>("input_7_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_7_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_7_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27410880)))];
+            tensor<fp16, [1280]> input_7_beta_0_to_fp16 = const()[name = tensor<string, []>("input_7_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27413504)))];
+            tensor<fp16, []> var_358_to_fp16 = const()[name = tensor<string, []>("op_358_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = input_7_beta_0_to_fp16, epsilon = var_358_to_fp16, gamma = input_7_gamma_0_to_fp16, x = inputs_3_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_pad_type_0 = const()[name = tensor<string, []>("input_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_9_strides_0 = const()[name = tensor<string, []>("input_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_9_pad_0 = const()[name = tensor<string, []>("input_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_9_dilations_0 = const()[name = tensor<string, []>("input_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_9_groups_0 = const()[name = tensor<string, []>("input_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_0_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27416128)))];
+            tensor<fp16, [5120]> blocks_0_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40523392)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_9_cast_fp16 = conv(bias = blocks_0_mlp_0_bias_to_fp16, dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = blocks_0_mlp_0_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<string, []> input_11_mode_0 = const()[name = tensor<string, []>("input_11_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_11_cast_fp16 = gelu(mode = input_11_mode_0, x = input_9_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<string, []> var_384_pad_type_0 = const()[name = tensor<string, []>("op_384_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_384_strides_0 = const()[name = tensor<string, []>("op_384_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_384_pad_0 = const()[name = tensor<string, []>("op_384_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_384_dilations_0 = const()[name = tensor<string, []>("op_384_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_384_groups_0 = const()[name = tensor<string, []>("op_384_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_0_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40533696)))];
+            tensor<fp16, [1280]> blocks_0_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53640960)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_384_cast_fp16 = conv(bias = blocks_0_mlp_2_bias_to_fp16, dilations = var_384_dilations_0, groups = var_384_groups_0, pad = var_384_pad_0, pad_type = var_384_pad_type_0, strides = var_384_strides_0, weight = blocks_0_mlp_2_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("op_384_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = var_384_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_393 = const()[name = tensor<string, []>("op_393"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_13_axes_0 = const()[name = tensor<string, []>("input_13_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_13_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53643584)))];
+            tensor<fp16, [1280]> input_13_beta_0_to_fp16 = const()[name = tensor<string, []>("input_13_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53646208)))];
+            tensor<fp16, []> var_409_to_fp16 = const()[name = tensor<string, []>("op_409_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = input_13_beta_0_to_fp16, epsilon = var_409_to_fp16, gamma = input_13_gamma_0_to_fp16, x = inputs_5_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> q_3_pad_type_0 = const()[name = tensor<string, []>("q_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_3_strides_0 = const()[name = tensor<string, []>("q_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_3_pad_0 = const()[name = tensor<string, []>("q_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_3_dilations_0 = const()[name = tensor<string, []>("q_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_3_groups_0 = const()[name = tensor<string, []>("q_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_444_weight_0_to_fp16 = const()[name = tensor<string, []>("op_444_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53648832)))];
+            tensor<fp16, [1280]> var_444_bias_0_to_fp16 = const()[name = tensor<string, []>("op_444_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56925696)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_444_cast_fp16 = conv(bias = var_444_bias_0_to_fp16, dilations = q_3_dilations_0, groups = q_3_groups_0, pad = q_3_pad_0, pad_type = q_3_pad_type_0, strides = q_3_strides_0, weight = var_444_weight_0_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_444_cast_fp16")];
+            tensor<string, []> k_3_pad_type_0 = const()[name = tensor<string, []>("k_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_3_strides_0 = const()[name = tensor<string, []>("k_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_3_pad_0 = const()[name = tensor<string, []>("k_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_3_dilations_0 = const()[name = tensor<string, []>("k_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_3_groups_0 = const()[name = tensor<string, []>("k_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_1_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(56928320)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_3_cast_fp16 = conv(dilations = k_3_dilations_0, groups = k_3_groups_0, pad = k_3_pad_0, pad_type = k_3_pad_type_0, strides = k_3_strides_0, weight = blocks_1_attn_key_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("k_3_cast_fp16")];
+            tensor<string, []> var_442_pad_type_0 = const()[name = tensor<string, []>("op_442_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_442_strides_0 = const()[name = tensor<string, []>("op_442_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_442_pad_0 = const()[name = tensor<string, []>("op_442_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_442_dilations_0 = const()[name = tensor<string, []>("op_442_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_442_groups_0 = const()[name = tensor<string, []>("op_442_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_1_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60205184)))];
+            tensor<fp16, [1280]> blocks_1_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63482048)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_442_cast_fp16 = conv(bias = blocks_1_attn_value_bias_to_fp16, dilations = var_442_dilations_0, groups = var_442_groups_0, pad = var_442_pad_0, pad_type = var_442_pad_type_0, strides = var_442_strides_0, weight = blocks_1_attn_value_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_442_cast_fp16")];
+            tensor<int32, [20]> tile_3 = const()[name = tensor<string, []>("tile_3"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_445_axis_0 = const()[name = tensor<string, []>("op_445_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_19 = split(axis = var_445_axis_0, split_sizes = tile_3, x = var_444_cast_fp16)[name = tensor<string, []>("op_445_cast_fp16")];
+            tensor<int32, [4]> var_466_perm_0 = const()[name = tensor<string, []>("op_466_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_4 = const()[name = tensor<string, []>("tile_4"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_467_axis_0 = const()[name = tensor<string, []>("op_467_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_466_cast_fp16 = transpose(perm = var_466_perm_0, x = k_3_cast_fp16)[name = tensor<string, []>("transpose_31")];
+            tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_19 = split(axis = var_467_axis_0, split_sizes = tile_4, x = var_466_cast_fp16)[name = tensor<string, []>("op_467_cast_fp16")];
+            tensor<int32, [20]> tile_5 = const()[name = tensor<string, []>("tile_5"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_488_axis_0 = const()[name = tensor<string, []>("op_488_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_19 = split(axis = var_488_axis_0, split_sizes = tile_5, x = var_442_cast_fp16)[name = tensor<string, []>("op_488_cast_fp16")];
+            tensor<string, []> aw_41_equation_0 = const()[name = tensor<string, []>("aw_41_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_41_cast_fp16 = einsum(equation = aw_41_equation_0, values = (var_467_cast_fp16_0, var_445_cast_fp16_0))[name = tensor<string, []>("aw_41_cast_fp16")];
+            tensor<string, []> aw_43_equation_0 = const()[name = tensor<string, []>("aw_43_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_43_cast_fp16 = einsum(equation = aw_43_equation_0, values = (var_467_cast_fp16_1, var_445_cast_fp16_1))[name = tensor<string, []>("aw_43_cast_fp16")];
+            tensor<string, []> aw_45_equation_0 = const()[name = tensor<string, []>("aw_45_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_45_cast_fp16 = einsum(equation = aw_45_equation_0, values = (var_467_cast_fp16_2, var_445_cast_fp16_2))[name = tensor<string, []>("aw_45_cast_fp16")];
+            tensor<string, []> aw_47_equation_0 = const()[name = tensor<string, []>("aw_47_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_47_cast_fp16 = einsum(equation = aw_47_equation_0, values = (var_467_cast_fp16_3, var_445_cast_fp16_3))[name = tensor<string, []>("aw_47_cast_fp16")];
+            tensor<string, []> aw_49_equation_0 = const()[name = tensor<string, []>("aw_49_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_49_cast_fp16 = einsum(equation = aw_49_equation_0, values = (var_467_cast_fp16_4, var_445_cast_fp16_4))[name = tensor<string, []>("aw_49_cast_fp16")];
+            tensor<string, []> aw_51_equation_0 = const()[name = tensor<string, []>("aw_51_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_51_cast_fp16 = einsum(equation = aw_51_equation_0, values = (var_467_cast_fp16_5, var_445_cast_fp16_5))[name = tensor<string, []>("aw_51_cast_fp16")];
+            tensor<string, []> aw_53_equation_0 = const()[name = tensor<string, []>("aw_53_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_53_cast_fp16 = einsum(equation = aw_53_equation_0, values = (var_467_cast_fp16_6, var_445_cast_fp16_6))[name = tensor<string, []>("aw_53_cast_fp16")];
+            tensor<string, []> aw_55_equation_0 = const()[name = tensor<string, []>("aw_55_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_55_cast_fp16 = einsum(equation = aw_55_equation_0, values = (var_467_cast_fp16_7, var_445_cast_fp16_7))[name = tensor<string, []>("aw_55_cast_fp16")];
+            tensor<string, []> aw_57_equation_0 = const()[name = tensor<string, []>("aw_57_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_57_cast_fp16 = einsum(equation = aw_57_equation_0, values = (var_467_cast_fp16_8, var_445_cast_fp16_8))[name = tensor<string, []>("aw_57_cast_fp16")];
+            tensor<string, []> aw_59_equation_0 = const()[name = tensor<string, []>("aw_59_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_59_cast_fp16 = einsum(equation = aw_59_equation_0, values = (var_467_cast_fp16_9, var_445_cast_fp16_9))[name = tensor<string, []>("aw_59_cast_fp16")];
+            tensor<string, []> aw_61_equation_0 = const()[name = tensor<string, []>("aw_61_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_61_cast_fp16 = einsum(equation = aw_61_equation_0, values = (var_467_cast_fp16_10, var_445_cast_fp16_10))[name = tensor<string, []>("aw_61_cast_fp16")];
+            tensor<string, []> aw_63_equation_0 = const()[name = tensor<string, []>("aw_63_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_63_cast_fp16 = einsum(equation = aw_63_equation_0, values = (var_467_cast_fp16_11, var_445_cast_fp16_11))[name = tensor<string, []>("aw_63_cast_fp16")];
+            tensor<string, []> aw_65_equation_0 = const()[name = tensor<string, []>("aw_65_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_65_cast_fp16 = einsum(equation = aw_65_equation_0, values = (var_467_cast_fp16_12, var_445_cast_fp16_12))[name = tensor<string, []>("aw_65_cast_fp16")];
+            tensor<string, []> aw_67_equation_0 = const()[name = tensor<string, []>("aw_67_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_67_cast_fp16 = einsum(equation = aw_67_equation_0, values = (var_467_cast_fp16_13, var_445_cast_fp16_13))[name = tensor<string, []>("aw_67_cast_fp16")];
+            tensor<string, []> aw_69_equation_0 = const()[name = tensor<string, []>("aw_69_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_69_cast_fp16 = einsum(equation = aw_69_equation_0, values = (var_467_cast_fp16_14, var_445_cast_fp16_14))[name = tensor<string, []>("aw_69_cast_fp16")];
+            tensor<string, []> aw_71_equation_0 = const()[name = tensor<string, []>("aw_71_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_71_cast_fp16 = einsum(equation = aw_71_equation_0, values = (var_467_cast_fp16_15, var_445_cast_fp16_15))[name = tensor<string, []>("aw_71_cast_fp16")];
+            tensor<string, []> aw_73_equation_0 = const()[name = tensor<string, []>("aw_73_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_73_cast_fp16 = einsum(equation = aw_73_equation_0, values = (var_467_cast_fp16_16, var_445_cast_fp16_16))[name = tensor<string, []>("aw_73_cast_fp16")];
+            tensor<string, []> aw_75_equation_0 = const()[name = tensor<string, []>("aw_75_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_75_cast_fp16 = einsum(equation = aw_75_equation_0, values = (var_467_cast_fp16_17, var_445_cast_fp16_17))[name = tensor<string, []>("aw_75_cast_fp16")];
+            tensor<string, []> aw_77_equation_0 = const()[name = tensor<string, []>("aw_77_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_77_cast_fp16 = einsum(equation = aw_77_equation_0, values = (var_467_cast_fp16_18, var_445_cast_fp16_18))[name = tensor<string, []>("aw_77_cast_fp16")];
+            tensor<string, []> aw_79_equation_0 = const()[name = tensor<string, []>("aw_79_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_79_cast_fp16 = einsum(equation = aw_79_equation_0, values = (var_467_cast_fp16_19, var_445_cast_fp16_19))[name = tensor<string, []>("aw_79_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_549_cast_fp16 = softmax(axis = var_393, x = aw_41_cast_fp16)[name = tensor<string, []>("op_549_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_550_cast_fp16 = softmax(axis = var_393, x = aw_43_cast_fp16)[name = tensor<string, []>("op_550_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_551_cast_fp16 = softmax(axis = var_393, x = aw_45_cast_fp16)[name = tensor<string, []>("op_551_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_552_cast_fp16 = softmax(axis = var_393, x = aw_47_cast_fp16)[name = tensor<string, []>("op_552_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_553_cast_fp16 = softmax(axis = var_393, x = aw_49_cast_fp16)[name = tensor<string, []>("op_553_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_554_cast_fp16 = softmax(axis = var_393, x = aw_51_cast_fp16)[name = tensor<string, []>("op_554_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_555_cast_fp16 = softmax(axis = var_393, x = aw_53_cast_fp16)[name = tensor<string, []>("op_555_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_556_cast_fp16 = softmax(axis = var_393, x = aw_55_cast_fp16)[name = tensor<string, []>("op_556_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_557_cast_fp16 = softmax(axis = var_393, x = aw_57_cast_fp16)[name = tensor<string, []>("op_557_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_558_cast_fp16 = softmax(axis = var_393, x = aw_59_cast_fp16)[name = tensor<string, []>("op_558_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_559_cast_fp16 = softmax(axis = var_393, x = aw_61_cast_fp16)[name = tensor<string, []>("op_559_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_560_cast_fp16 = softmax(axis = var_393, x = aw_63_cast_fp16)[name = tensor<string, []>("op_560_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_561_cast_fp16 = softmax(axis = var_393, x = aw_65_cast_fp16)[name = tensor<string, []>("op_561_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_562_cast_fp16 = softmax(axis = var_393, x = aw_67_cast_fp16)[name = tensor<string, []>("op_562_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_563_cast_fp16 = softmax(axis = var_393, x = aw_69_cast_fp16)[name = tensor<string, []>("op_563_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_564_cast_fp16 = softmax(axis = var_393, x = aw_71_cast_fp16)[name = tensor<string, []>("op_564_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_565_cast_fp16 = softmax(axis = var_393, x = aw_73_cast_fp16)[name = tensor<string, []>("op_565_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_566_cast_fp16 = softmax(axis = var_393, x = aw_75_cast_fp16)[name = tensor<string, []>("op_566_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_567_cast_fp16 = softmax(axis = var_393, x = aw_77_cast_fp16)[name = tensor<string, []>("op_567_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_568_cast_fp16 = softmax(axis = var_393, x = aw_79_cast_fp16)[name = tensor<string, []>("op_568_cast_fp16")];
+            tensor<string, []> var_570_equation_0 = const()[name = tensor<string, []>("op_570_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_570_cast_fp16 = einsum(equation = var_570_equation_0, values = (var_488_cast_fp16_0, var_549_cast_fp16))[name = tensor<string, []>("op_570_cast_fp16")];
+            tensor<string, []> var_572_equation_0 = const()[name = tensor<string, []>("op_572_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_572_cast_fp16 = einsum(equation = var_572_equation_0, values = (var_488_cast_fp16_1, var_550_cast_fp16))[name = tensor<string, []>("op_572_cast_fp16")];
+            tensor<string, []> var_574_equation_0 = const()[name = tensor<string, []>("op_574_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_574_cast_fp16 = einsum(equation = var_574_equation_0, values = (var_488_cast_fp16_2, var_551_cast_fp16))[name = tensor<string, []>("op_574_cast_fp16")];
+            tensor<string, []> var_576_equation_0 = const()[name = tensor<string, []>("op_576_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_576_cast_fp16 = einsum(equation = var_576_equation_0, values = (var_488_cast_fp16_3, var_552_cast_fp16))[name = tensor<string, []>("op_576_cast_fp16")];
+            tensor<string, []> var_578_equation_0 = const()[name = tensor<string, []>("op_578_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_578_cast_fp16 = einsum(equation = var_578_equation_0, values = (var_488_cast_fp16_4, var_553_cast_fp16))[name = tensor<string, []>("op_578_cast_fp16")];
+            tensor<string, []> var_580_equation_0 = const()[name = tensor<string, []>("op_580_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_580_cast_fp16 = einsum(equation = var_580_equation_0, values = (var_488_cast_fp16_5, var_554_cast_fp16))[name = tensor<string, []>("op_580_cast_fp16")];
+            tensor<string, []> var_582_equation_0 = const()[name = tensor<string, []>("op_582_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_582_cast_fp16 = einsum(equation = var_582_equation_0, values = (var_488_cast_fp16_6, var_555_cast_fp16))[name = tensor<string, []>("op_582_cast_fp16")];
+            tensor<string, []> var_584_equation_0 = const()[name = tensor<string, []>("op_584_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_584_cast_fp16 = einsum(equation = var_584_equation_0, values = (var_488_cast_fp16_7, var_556_cast_fp16))[name = tensor<string, []>("op_584_cast_fp16")];
+            tensor<string, []> var_586_equation_0 = const()[name = tensor<string, []>("op_586_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_586_cast_fp16 = einsum(equation = var_586_equation_0, values = (var_488_cast_fp16_8, var_557_cast_fp16))[name = tensor<string, []>("op_586_cast_fp16")];
+            tensor<string, []> var_588_equation_0 = const()[name = tensor<string, []>("op_588_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_588_cast_fp16 = einsum(equation = var_588_equation_0, values = (var_488_cast_fp16_9, var_558_cast_fp16))[name = tensor<string, []>("op_588_cast_fp16")];
+            tensor<string, []> var_590_equation_0 = const()[name = tensor<string, []>("op_590_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_590_cast_fp16 = einsum(equation = var_590_equation_0, values = (var_488_cast_fp16_10, var_559_cast_fp16))[name = tensor<string, []>("op_590_cast_fp16")];
+            tensor<string, []> var_592_equation_0 = const()[name = tensor<string, []>("op_592_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_592_cast_fp16 = einsum(equation = var_592_equation_0, values = (var_488_cast_fp16_11, var_560_cast_fp16))[name = tensor<string, []>("op_592_cast_fp16")];
+            tensor<string, []> var_594_equation_0 = const()[name = tensor<string, []>("op_594_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_594_cast_fp16 = einsum(equation = var_594_equation_0, values = (var_488_cast_fp16_12, var_561_cast_fp16))[name = tensor<string, []>("op_594_cast_fp16")];
+            tensor<string, []> var_596_equation_0 = const()[name = tensor<string, []>("op_596_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_596_cast_fp16 = einsum(equation = var_596_equation_0, values = (var_488_cast_fp16_13, var_562_cast_fp16))[name = tensor<string, []>("op_596_cast_fp16")];
+            tensor<string, []> var_598_equation_0 = const()[name = tensor<string, []>("op_598_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_598_cast_fp16 = einsum(equation = var_598_equation_0, values = (var_488_cast_fp16_14, var_563_cast_fp16))[name = tensor<string, []>("op_598_cast_fp16")];
+            tensor<string, []> var_600_equation_0 = const()[name = tensor<string, []>("op_600_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_600_cast_fp16 = einsum(equation = var_600_equation_0, values = (var_488_cast_fp16_15, var_564_cast_fp16))[name = tensor<string, []>("op_600_cast_fp16")];
+            tensor<string, []> var_602_equation_0 = const()[name = tensor<string, []>("op_602_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_602_cast_fp16 = einsum(equation = var_602_equation_0, values = (var_488_cast_fp16_16, var_565_cast_fp16))[name = tensor<string, []>("op_602_cast_fp16")];
+            tensor<string, []> var_604_equation_0 = const()[name = tensor<string, []>("op_604_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_604_cast_fp16 = einsum(equation = var_604_equation_0, values = (var_488_cast_fp16_17, var_566_cast_fp16))[name = tensor<string, []>("op_604_cast_fp16")];
+            tensor<string, []> var_606_equation_0 = const()[name = tensor<string, []>("op_606_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_606_cast_fp16 = einsum(equation = var_606_equation_0, values = (var_488_cast_fp16_18, var_567_cast_fp16))[name = tensor<string, []>("op_606_cast_fp16")];
+            tensor<string, []> var_608_equation_0 = const()[name = tensor<string, []>("op_608_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_608_cast_fp16 = einsum(equation = var_608_equation_0, values = (var_488_cast_fp16_19, var_568_cast_fp16))[name = tensor<string, []>("op_608_cast_fp16")];
+            tensor<bool, []> input_15_interleave_0 = const()[name = tensor<string, []>("input_15_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_15_cast_fp16 = concat(axis = var_393, interleave = input_15_interleave_0, values = (var_570_cast_fp16, var_572_cast_fp16, var_574_cast_fp16, var_576_cast_fp16, var_578_cast_fp16, var_580_cast_fp16, var_582_cast_fp16, var_584_cast_fp16, var_586_cast_fp16, var_588_cast_fp16, var_590_cast_fp16, var_592_cast_fp16, var_594_cast_fp16, var_596_cast_fp16, var_598_cast_fp16, var_600_cast_fp16, var_602_cast_fp16, var_604_cast_fp16, var_606_cast_fp16, var_608_cast_fp16))[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<string, []> var_617_pad_type_0 = const()[name = tensor<string, []>("op_617_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_617_strides_0 = const()[name = tensor<string, []>("op_617_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_617_pad_0 = const()[name = tensor<string, []>("op_617_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_617_dilations_0 = const()[name = tensor<string, []>("op_617_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_617_groups_0 = const()[name = tensor<string, []>("op_617_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_1_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63484672)))];
+            tensor<fp16, [1280]> blocks_1_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66761536)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_617_cast_fp16 = conv(bias = blocks_1_attn_out_bias_to_fp16, dilations = var_617_dilations_0, groups = var_617_groups_0, pad = var_617_pad_0, pad_type = var_617_pad_type_0, strides = var_617_strides_0, weight = blocks_1_attn_out_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("op_617_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = var_617_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> input_17_axes_0 = const()[name = tensor<string, []>("input_17_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_17_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_17_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66764160)))];
+            tensor<fp16, [1280]> input_17_beta_0_to_fp16 = const()[name = tensor<string, []>("input_17_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66766784)))];
+            tensor<fp16, []> var_627_to_fp16 = const()[name = tensor<string, []>("op_627_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_17_cast_fp16 = layer_norm(axes = input_17_axes_0, beta = input_17_beta_0_to_fp16, epsilon = var_627_to_fp16, gamma = input_17_gamma_0_to_fp16, x = inputs_7_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_pad_type_0 = const()[name = tensor<string, []>("input_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_19_strides_0 = const()[name = tensor<string, []>("input_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_19_pad_0 = const()[name = tensor<string, []>("input_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_19_dilations_0 = const()[name = tensor<string, []>("input_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_19_groups_0 = const()[name = tensor<string, []>("input_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_1_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66769408)))];
+            tensor<fp16, [5120]> blocks_1_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79876672)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_19_cast_fp16 = conv(bias = blocks_1_mlp_0_bias_to_fp16, dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = blocks_1_mlp_0_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<string, []> input_21_mode_0 = const()[name = tensor<string, []>("input_21_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_21_cast_fp16 = gelu(mode = input_21_mode_0, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> var_653_pad_type_0 = const()[name = tensor<string, []>("op_653_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_653_strides_0 = const()[name = tensor<string, []>("op_653_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_653_pad_0 = const()[name = tensor<string, []>("op_653_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_653_dilations_0 = const()[name = tensor<string, []>("op_653_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_653_groups_0 = const()[name = tensor<string, []>("op_653_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_1_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79886976)))];
+            tensor<fp16, [1280]> blocks_1_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92994240)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_653_cast_fp16 = conv(bias = blocks_1_mlp_2_bias_to_fp16, dilations = var_653_dilations_0, groups = var_653_groups_0, pad = var_653_pad_0, pad_type = var_653_pad_type_0, strides = var_653_strides_0, weight = blocks_1_mlp_2_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("op_653_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = var_653_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_662 = const()[name = tensor<string, []>("op_662"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_23_axes_0 = const()[name = tensor<string, []>("input_23_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_23_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92996864)))];
+            tensor<fp16, [1280]> input_23_beta_0_to_fp16 = const()[name = tensor<string, []>("input_23_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92999488)))];
+            tensor<fp16, []> var_678_to_fp16 = const()[name = tensor<string, []>("op_678_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_23_cast_fp16 = layer_norm(axes = input_23_axes_0, beta = input_23_beta_0_to_fp16, epsilon = var_678_to_fp16, gamma = input_23_gamma_0_to_fp16, x = inputs_9_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<string, []> q_5_pad_type_0 = const()[name = tensor<string, []>("q_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_5_strides_0 = const()[name = tensor<string, []>("q_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_5_pad_0 = const()[name = tensor<string, []>("q_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_5_dilations_0 = const()[name = tensor<string, []>("q_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_5_groups_0 = const()[name = tensor<string, []>("q_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_713_weight_0_to_fp16 = const()[name = tensor<string, []>("op_713_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93002112)))];
+            tensor<fp16, [1280]> var_713_bias_0_to_fp16 = const()[name = tensor<string, []>("op_713_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96278976)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_713_cast_fp16 = conv(bias = var_713_bias_0_to_fp16, dilations = q_5_dilations_0, groups = q_5_groups_0, pad = q_5_pad_0, pad_type = q_5_pad_type_0, strides = q_5_strides_0, weight = var_713_weight_0_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_713_cast_fp16")];
+            tensor<string, []> k_5_pad_type_0 = const()[name = tensor<string, []>("k_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_5_strides_0 = const()[name = tensor<string, []>("k_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_5_pad_0 = const()[name = tensor<string, []>("k_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_5_dilations_0 = const()[name = tensor<string, []>("k_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_5_groups_0 = const()[name = tensor<string, []>("k_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_2_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96281600)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_5_cast_fp16 = conv(dilations = k_5_dilations_0, groups = k_5_groups_0, pad = k_5_pad_0, pad_type = k_5_pad_type_0, strides = k_5_strides_0, weight = blocks_2_attn_key_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("k_5_cast_fp16")];
+            tensor<string, []> var_711_pad_type_0 = const()[name = tensor<string, []>("op_711_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_711_strides_0 = const()[name = tensor<string, []>("op_711_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_711_pad_0 = const()[name = tensor<string, []>("op_711_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_711_dilations_0 = const()[name = tensor<string, []>("op_711_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_711_groups_0 = const()[name = tensor<string, []>("op_711_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_2_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99558464)))];
+            tensor<fp16, [1280]> blocks_2_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102835328)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_711_cast_fp16 = conv(bias = blocks_2_attn_value_bias_to_fp16, dilations = var_711_dilations_0, groups = var_711_groups_0, pad = var_711_pad_0, pad_type = var_711_pad_type_0, strides = var_711_strides_0, weight = blocks_2_attn_value_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_711_cast_fp16")];
+            tensor<int32, [20]> tile_6 = const()[name = tensor<string, []>("tile_6"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_714_axis_0 = const()[name = tensor<string, []>("op_714_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_19 = split(axis = var_714_axis_0, split_sizes = tile_6, x = var_713_cast_fp16)[name = tensor<string, []>("op_714_cast_fp16")];
+            tensor<int32, [4]> var_735_perm_0 = const()[name = tensor<string, []>("op_735_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_7 = const()[name = tensor<string, []>("tile_7"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_736_axis_0 = const()[name = tensor<string, []>("op_736_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_735_cast_fp16 = transpose(perm = var_735_perm_0, x = k_5_cast_fp16)[name = tensor<string, []>("transpose_30")];
+            tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_19 = split(axis = var_736_axis_0, split_sizes = tile_7, x = var_735_cast_fp16)[name = tensor<string, []>("op_736_cast_fp16")];
+            tensor<int32, [20]> tile_8 = const()[name = tensor<string, []>("tile_8"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_757_axis_0 = const()[name = tensor<string, []>("op_757_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_19 = split(axis = var_757_axis_0, split_sizes = tile_8, x = var_711_cast_fp16)[name = tensor<string, []>("op_757_cast_fp16")];
+            tensor<string, []> aw_81_equation_0 = const()[name = tensor<string, []>("aw_81_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_81_cast_fp16 = einsum(equation = aw_81_equation_0, values = (var_736_cast_fp16_0, var_714_cast_fp16_0))[name = tensor<string, []>("aw_81_cast_fp16")];
+            tensor<string, []> aw_83_equation_0 = const()[name = tensor<string, []>("aw_83_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_83_cast_fp16 = einsum(equation = aw_83_equation_0, values = (var_736_cast_fp16_1, var_714_cast_fp16_1))[name = tensor<string, []>("aw_83_cast_fp16")];
+            tensor<string, []> aw_85_equation_0 = const()[name = tensor<string, []>("aw_85_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_85_cast_fp16 = einsum(equation = aw_85_equation_0, values = (var_736_cast_fp16_2, var_714_cast_fp16_2))[name = tensor<string, []>("aw_85_cast_fp16")];
+            tensor<string, []> aw_87_equation_0 = const()[name = tensor<string, []>("aw_87_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_87_cast_fp16 = einsum(equation = aw_87_equation_0, values = (var_736_cast_fp16_3, var_714_cast_fp16_3))[name = tensor<string, []>("aw_87_cast_fp16")];
+            tensor<string, []> aw_89_equation_0 = const()[name = tensor<string, []>("aw_89_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_89_cast_fp16 = einsum(equation = aw_89_equation_0, values = (var_736_cast_fp16_4, var_714_cast_fp16_4))[name = tensor<string, []>("aw_89_cast_fp16")];
+            tensor<string, []> aw_91_equation_0 = const()[name = tensor<string, []>("aw_91_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_91_cast_fp16 = einsum(equation = aw_91_equation_0, values = (var_736_cast_fp16_5, var_714_cast_fp16_5))[name = tensor<string, []>("aw_91_cast_fp16")];
+            tensor<string, []> aw_93_equation_0 = const()[name = tensor<string, []>("aw_93_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_93_cast_fp16 = einsum(equation = aw_93_equation_0, values = (var_736_cast_fp16_6, var_714_cast_fp16_6))[name = tensor<string, []>("aw_93_cast_fp16")];
+            tensor<string, []> aw_95_equation_0 = const()[name = tensor<string, []>("aw_95_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_95_cast_fp16 = einsum(equation = aw_95_equation_0, values = (var_736_cast_fp16_7, var_714_cast_fp16_7))[name = tensor<string, []>("aw_95_cast_fp16")];
+            tensor<string, []> aw_97_equation_0 = const()[name = tensor<string, []>("aw_97_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_97_cast_fp16 = einsum(equation = aw_97_equation_0, values = (var_736_cast_fp16_8, var_714_cast_fp16_8))[name = tensor<string, []>("aw_97_cast_fp16")];
+            tensor<string, []> aw_99_equation_0 = const()[name = tensor<string, []>("aw_99_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_99_cast_fp16 = einsum(equation = aw_99_equation_0, values = (var_736_cast_fp16_9, var_714_cast_fp16_9))[name = tensor<string, []>("aw_99_cast_fp16")];
+            tensor<string, []> aw_101_equation_0 = const()[name = tensor<string, []>("aw_101_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_101_cast_fp16 = einsum(equation = aw_101_equation_0, values = (var_736_cast_fp16_10, var_714_cast_fp16_10))[name = tensor<string, []>("aw_101_cast_fp16")];
+            tensor<string, []> aw_103_equation_0 = const()[name = tensor<string, []>("aw_103_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_103_cast_fp16 = einsum(equation = aw_103_equation_0, values = (var_736_cast_fp16_11, var_714_cast_fp16_11))[name = tensor<string, []>("aw_103_cast_fp16")];
+            tensor<string, []> aw_105_equation_0 = const()[name = tensor<string, []>("aw_105_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_105_cast_fp16 = einsum(equation = aw_105_equation_0, values = (var_736_cast_fp16_12, var_714_cast_fp16_12))[name = tensor<string, []>("aw_105_cast_fp16")];
+            tensor<string, []> aw_107_equation_0 = const()[name = tensor<string, []>("aw_107_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_107_cast_fp16 = einsum(equation = aw_107_equation_0, values = (var_736_cast_fp16_13, var_714_cast_fp16_13))[name = tensor<string, []>("aw_107_cast_fp16")];
+            tensor<string, []> aw_109_equation_0 = const()[name = tensor<string, []>("aw_109_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_109_cast_fp16 = einsum(equation = aw_109_equation_0, values = (var_736_cast_fp16_14, var_714_cast_fp16_14))[name = tensor<string, []>("aw_109_cast_fp16")];
+            tensor<string, []> aw_111_equation_0 = const()[name = tensor<string, []>("aw_111_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_111_cast_fp16 = einsum(equation = aw_111_equation_0, values = (var_736_cast_fp16_15, var_714_cast_fp16_15))[name = tensor<string, []>("aw_111_cast_fp16")];
+            tensor<string, []> aw_113_equation_0 = const()[name = tensor<string, []>("aw_113_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_113_cast_fp16 = einsum(equation = aw_113_equation_0, values = (var_736_cast_fp16_16, var_714_cast_fp16_16))[name = tensor<string, []>("aw_113_cast_fp16")];
+            tensor<string, []> aw_115_equation_0 = const()[name = tensor<string, []>("aw_115_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_115_cast_fp16 = einsum(equation = aw_115_equation_0, values = (var_736_cast_fp16_17, var_714_cast_fp16_17))[name = tensor<string, []>("aw_115_cast_fp16")];
+            tensor<string, []> aw_117_equation_0 = const()[name = tensor<string, []>("aw_117_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_117_cast_fp16 = einsum(equation = aw_117_equation_0, values = (var_736_cast_fp16_18, var_714_cast_fp16_18))[name = tensor<string, []>("aw_117_cast_fp16")];
+            tensor<string, []> aw_119_equation_0 = const()[name = tensor<string, []>("aw_119_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_119_cast_fp16 = einsum(equation = aw_119_equation_0, values = (var_736_cast_fp16_19, var_714_cast_fp16_19))[name = tensor<string, []>("aw_119_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_818_cast_fp16 = softmax(axis = var_662, x = aw_81_cast_fp16)[name = tensor<string, []>("op_818_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_819_cast_fp16 = softmax(axis = var_662, x = aw_83_cast_fp16)[name = tensor<string, []>("op_819_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_820_cast_fp16 = softmax(axis = var_662, x = aw_85_cast_fp16)[name = tensor<string, []>("op_820_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_821_cast_fp16 = softmax(axis = var_662, x = aw_87_cast_fp16)[name = tensor<string, []>("op_821_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_822_cast_fp16 = softmax(axis = var_662, x = aw_89_cast_fp16)[name = tensor<string, []>("op_822_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_823_cast_fp16 = softmax(axis = var_662, x = aw_91_cast_fp16)[name = tensor<string, []>("op_823_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_824_cast_fp16 = softmax(axis = var_662, x = aw_93_cast_fp16)[name = tensor<string, []>("op_824_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_825_cast_fp16 = softmax(axis = var_662, x = aw_95_cast_fp16)[name = tensor<string, []>("op_825_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_826_cast_fp16 = softmax(axis = var_662, x = aw_97_cast_fp16)[name = tensor<string, []>("op_826_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_827_cast_fp16 = softmax(axis = var_662, x = aw_99_cast_fp16)[name = tensor<string, []>("op_827_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_828_cast_fp16 = softmax(axis = var_662, x = aw_101_cast_fp16)[name = tensor<string, []>("op_828_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_829_cast_fp16 = softmax(axis = var_662, x = aw_103_cast_fp16)[name = tensor<string, []>("op_829_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_830_cast_fp16 = softmax(axis = var_662, x = aw_105_cast_fp16)[name = tensor<string, []>("op_830_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_831_cast_fp16 = softmax(axis = var_662, x = aw_107_cast_fp16)[name = tensor<string, []>("op_831_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_832_cast_fp16 = softmax(axis = var_662, x = aw_109_cast_fp16)[name = tensor<string, []>("op_832_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_833_cast_fp16 = softmax(axis = var_662, x = aw_111_cast_fp16)[name = tensor<string, []>("op_833_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_834_cast_fp16 = softmax(axis = var_662, x = aw_113_cast_fp16)[name = tensor<string, []>("op_834_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_835_cast_fp16 = softmax(axis = var_662, x = aw_115_cast_fp16)[name = tensor<string, []>("op_835_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_836_cast_fp16 = softmax(axis = var_662, x = aw_117_cast_fp16)[name = tensor<string, []>("op_836_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_837_cast_fp16 = softmax(axis = var_662, x = aw_119_cast_fp16)[name = tensor<string, []>("op_837_cast_fp16")];
+            tensor<string, []> var_839_equation_0 = const()[name = tensor<string, []>("op_839_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_839_cast_fp16 = einsum(equation = var_839_equation_0, values = (var_757_cast_fp16_0, var_818_cast_fp16))[name = tensor<string, []>("op_839_cast_fp16")];
+            tensor<string, []> var_841_equation_0 = const()[name = tensor<string, []>("op_841_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_841_cast_fp16 = einsum(equation = var_841_equation_0, values = (var_757_cast_fp16_1, var_819_cast_fp16))[name = tensor<string, []>("op_841_cast_fp16")];
+            tensor<string, []> var_843_equation_0 = const()[name = tensor<string, []>("op_843_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_843_cast_fp16 = einsum(equation = var_843_equation_0, values = (var_757_cast_fp16_2, var_820_cast_fp16))[name = tensor<string, []>("op_843_cast_fp16")];
+            tensor<string, []> var_845_equation_0 = const()[name = tensor<string, []>("op_845_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_845_cast_fp16 = einsum(equation = var_845_equation_0, values = (var_757_cast_fp16_3, var_821_cast_fp16))[name = tensor<string, []>("op_845_cast_fp16")];
+            tensor<string, []> var_847_equation_0 = const()[name = tensor<string, []>("op_847_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_847_cast_fp16 = einsum(equation = var_847_equation_0, values = (var_757_cast_fp16_4, var_822_cast_fp16))[name = tensor<string, []>("op_847_cast_fp16")];
+            tensor<string, []> var_849_equation_0 = const()[name = tensor<string, []>("op_849_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_849_cast_fp16 = einsum(equation = var_849_equation_0, values = (var_757_cast_fp16_5, var_823_cast_fp16))[name = tensor<string, []>("op_849_cast_fp16")];
+            tensor<string, []> var_851_equation_0 = const()[name = tensor<string, []>("op_851_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_851_cast_fp16 = einsum(equation = var_851_equation_0, values = (var_757_cast_fp16_6, var_824_cast_fp16))[name = tensor<string, []>("op_851_cast_fp16")];
+            tensor<string, []> var_853_equation_0 = const()[name = tensor<string, []>("op_853_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_853_cast_fp16 = einsum(equation = var_853_equation_0, values = (var_757_cast_fp16_7, var_825_cast_fp16))[name = tensor<string, []>("op_853_cast_fp16")];
+            tensor<string, []> var_855_equation_0 = const()[name = tensor<string, []>("op_855_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_855_cast_fp16 = einsum(equation = var_855_equation_0, values = (var_757_cast_fp16_8, var_826_cast_fp16))[name = tensor<string, []>("op_855_cast_fp16")];
+            tensor<string, []> var_857_equation_0 = const()[name = tensor<string, []>("op_857_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_857_cast_fp16 = einsum(equation = var_857_equation_0, values = (var_757_cast_fp16_9, var_827_cast_fp16))[name = tensor<string, []>("op_857_cast_fp16")];
+            tensor<string, []> var_859_equation_0 = const()[name = tensor<string, []>("op_859_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_859_cast_fp16 = einsum(equation = var_859_equation_0, values = (var_757_cast_fp16_10, var_828_cast_fp16))[name = tensor<string, []>("op_859_cast_fp16")];
+            tensor<string, []> var_861_equation_0 = const()[name = tensor<string, []>("op_861_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_861_cast_fp16 = einsum(equation = var_861_equation_0, values = (var_757_cast_fp16_11, var_829_cast_fp16))[name = tensor<string, []>("op_861_cast_fp16")];
+            tensor<string, []> var_863_equation_0 = const()[name = tensor<string, []>("op_863_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_863_cast_fp16 = einsum(equation = var_863_equation_0, values = (var_757_cast_fp16_12, var_830_cast_fp16))[name = tensor<string, []>("op_863_cast_fp16")];
+            tensor<string, []> var_865_equation_0 = const()[name = tensor<string, []>("op_865_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_865_cast_fp16 = einsum(equation = var_865_equation_0, values = (var_757_cast_fp16_13, var_831_cast_fp16))[name = tensor<string, []>("op_865_cast_fp16")];
+            tensor<string, []> var_867_equation_0 = const()[name = tensor<string, []>("op_867_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_867_cast_fp16 = einsum(equation = var_867_equation_0, values = (var_757_cast_fp16_14, var_832_cast_fp16))[name = tensor<string, []>("op_867_cast_fp16")];
+            tensor<string, []> var_869_equation_0 = const()[name = tensor<string, []>("op_869_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_869_cast_fp16 = einsum(equation = var_869_equation_0, values = (var_757_cast_fp16_15, var_833_cast_fp16))[name = tensor<string, []>("op_869_cast_fp16")];
+            tensor<string, []> var_871_equation_0 = const()[name = tensor<string, []>("op_871_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16 = einsum(equation = var_871_equation_0, values = (var_757_cast_fp16_16, var_834_cast_fp16))[name = tensor<string, []>("op_871_cast_fp16")];
+            tensor<string, []> var_873_equation_0 = const()[name = tensor<string, []>("op_873_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_873_cast_fp16 = einsum(equation = var_873_equation_0, values = (var_757_cast_fp16_17, var_835_cast_fp16))[name = tensor<string, []>("op_873_cast_fp16")];
+            tensor<string, []> var_875_equation_0 = const()[name = tensor<string, []>("op_875_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_875_cast_fp16 = einsum(equation = var_875_equation_0, values = (var_757_cast_fp16_18, var_836_cast_fp16))[name = tensor<string, []>("op_875_cast_fp16")];
+            tensor<string, []> var_877_equation_0 = const()[name = tensor<string, []>("op_877_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_877_cast_fp16 = einsum(equation = var_877_equation_0, values = (var_757_cast_fp16_19, var_837_cast_fp16))[name = tensor<string, []>("op_877_cast_fp16")];
+            tensor<bool, []> input_25_interleave_0 = const()[name = tensor<string, []>("input_25_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_25_cast_fp16 = concat(axis = var_662, interleave = input_25_interleave_0, values = (var_839_cast_fp16, var_841_cast_fp16, var_843_cast_fp16, var_845_cast_fp16, var_847_cast_fp16, var_849_cast_fp16, var_851_cast_fp16, var_853_cast_fp16, var_855_cast_fp16, var_857_cast_fp16, var_859_cast_fp16, var_861_cast_fp16, var_863_cast_fp16, var_865_cast_fp16, var_867_cast_fp16, var_869_cast_fp16, var_871_cast_fp16, var_873_cast_fp16, var_875_cast_fp16, var_877_cast_fp16))[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<string, []> var_886_pad_type_0 = const()[name = tensor<string, []>("op_886_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_886_strides_0 = const()[name = tensor<string, []>("op_886_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_886_pad_0 = const()[name = tensor<string, []>("op_886_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_886_dilations_0 = const()[name = tensor<string, []>("op_886_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_886_groups_0 = const()[name = tensor<string, []>("op_886_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_2_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102837952)))];
+            tensor<fp16, [1280]> blocks_2_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106114816)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_886_cast_fp16 = conv(bias = blocks_2_attn_out_bias_to_fp16, dilations = var_886_dilations_0, groups = var_886_groups_0, pad = var_886_pad_0, pad_type = var_886_pad_type_0, strides = var_886_strides_0, weight = blocks_2_attn_out_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("op_886_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = var_886_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> input_27_axes_0 = const()[name = tensor<string, []>("input_27_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106117440)))];
+            tensor<fp16, [1280]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106120064)))];
+            tensor<fp16, []> var_896_to_fp16 = const()[name = tensor<string, []>("op_896_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_27_cast_fp16 = layer_norm(axes = input_27_axes_0, beta = input_27_beta_0_to_fp16, epsilon = var_896_to_fp16, gamma = input_27_gamma_0_to_fp16, x = inputs_11_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_29_strides_0 = const()[name = tensor<string, []>("input_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_29_dilations_0 = const()[name = tensor<string, []>("input_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_29_groups_0 = const()[name = tensor<string, []>("input_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_2_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106122688)))];
+            tensor<fp16, [5120]> blocks_2_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119229952)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_29_cast_fp16 = conv(bias = blocks_2_mlp_0_bias_to_fp16, dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = blocks_2_mlp_0_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<string, []> var_922_pad_type_0 = const()[name = tensor<string, []>("op_922_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_922_strides_0 = const()[name = tensor<string, []>("op_922_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_922_pad_0 = const()[name = tensor<string, []>("op_922_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_922_dilations_0 = const()[name = tensor<string, []>("op_922_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_922_groups_0 = const()[name = tensor<string, []>("op_922_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_2_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119240256)))];
+            tensor<fp16, [1280]> blocks_2_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132347520)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_922_cast_fp16 = conv(bias = blocks_2_mlp_2_bias_to_fp16, dilations = var_922_dilations_0, groups = var_922_groups_0, pad = var_922_pad_0, pad_type = var_922_pad_type_0, strides = var_922_strides_0, weight = blocks_2_mlp_2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("op_922_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = var_922_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_931 = const()[name = tensor<string, []>("op_931"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_33_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_33_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132350144)))];
+            tensor<fp16, [1280]> input_33_beta_0_to_fp16 = const()[name = tensor<string, []>("input_33_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132352768)))];
+            tensor<fp16, []> var_947_to_fp16 = const()[name = tensor<string, []>("op_947_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, beta = input_33_beta_0_to_fp16, epsilon = var_947_to_fp16, gamma = input_33_gamma_0_to_fp16, x = inputs_13_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<string, []> q_7_pad_type_0 = const()[name = tensor<string, []>("q_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_7_strides_0 = const()[name = tensor<string, []>("q_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_7_pad_0 = const()[name = tensor<string, []>("q_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_7_dilations_0 = const()[name = tensor<string, []>("q_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_7_groups_0 = const()[name = tensor<string, []>("q_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_982_weight_0_to_fp16 = const()[name = tensor<string, []>("op_982_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132355392)))];
+            tensor<fp16, [1280]> var_982_bias_0_to_fp16 = const()[name = tensor<string, []>("op_982_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135632256)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_982_cast_fp16 = conv(bias = var_982_bias_0_to_fp16, dilations = q_7_dilations_0, groups = q_7_groups_0, pad = q_7_pad_0, pad_type = q_7_pad_type_0, strides = q_7_strides_0, weight = var_982_weight_0_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_982_cast_fp16")];
+            tensor<string, []> k_7_pad_type_0 = const()[name = tensor<string, []>("k_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_7_strides_0 = const()[name = tensor<string, []>("k_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_7_pad_0 = const()[name = tensor<string, []>("k_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_7_dilations_0 = const()[name = tensor<string, []>("k_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_7_groups_0 = const()[name = tensor<string, []>("k_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_3_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135634880)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_7_cast_fp16 = conv(dilations = k_7_dilations_0, groups = k_7_groups_0, pad = k_7_pad_0, pad_type = k_7_pad_type_0, strides = k_7_strides_0, weight = blocks_3_attn_key_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("k_7_cast_fp16")];
+            tensor<string, []> var_980_pad_type_0 = const()[name = tensor<string, []>("op_980_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_980_strides_0 = const()[name = tensor<string, []>("op_980_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_980_pad_0 = const()[name = tensor<string, []>("op_980_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_980_dilations_0 = const()[name = tensor<string, []>("op_980_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_980_groups_0 = const()[name = tensor<string, []>("op_980_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_3_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138911744)))];
+            tensor<fp16, [1280]> blocks_3_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142188608)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_980_cast_fp16 = conv(bias = blocks_3_attn_value_bias_to_fp16, dilations = var_980_dilations_0, groups = var_980_groups_0, pad = var_980_pad_0, pad_type = var_980_pad_type_0, strides = var_980_strides_0, weight = blocks_3_attn_value_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_980_cast_fp16")];
+            tensor<int32, [20]> tile_9 = const()[name = tensor<string, []>("tile_9"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_983_axis_0 = const()[name = tensor<string, []>("op_983_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_19 = split(axis = var_983_axis_0, split_sizes = tile_9, x = var_982_cast_fp16)[name = tensor<string, []>("op_983_cast_fp16")];
+            tensor<int32, [4]> var_1004_perm_0 = const()[name = tensor<string, []>("op_1004_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_10 = const()[name = tensor<string, []>("tile_10"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1005_axis_0 = const()[name = tensor<string, []>("op_1005_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_1004_cast_fp16 = transpose(perm = var_1004_perm_0, x = k_7_cast_fp16)[name = tensor<string, []>("transpose_29")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_19 = split(axis = var_1005_axis_0, split_sizes = tile_10, x = var_1004_cast_fp16)[name = tensor<string, []>("op_1005_cast_fp16")];
+            tensor<int32, [20]> tile_11 = const()[name = tensor<string, []>("tile_11"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1026_axis_0 = const()[name = tensor<string, []>("op_1026_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_19 = split(axis = var_1026_axis_0, split_sizes = tile_11, x = var_980_cast_fp16)[name = tensor<string, []>("op_1026_cast_fp16")];
+            tensor<string, []> aw_121_equation_0 = const()[name = tensor<string, []>("aw_121_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_121_cast_fp16 = einsum(equation = aw_121_equation_0, values = (var_1005_cast_fp16_0, var_983_cast_fp16_0))[name = tensor<string, []>("aw_121_cast_fp16")];
+            tensor<string, []> aw_123_equation_0 = const()[name = tensor<string, []>("aw_123_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_123_cast_fp16 = einsum(equation = aw_123_equation_0, values = (var_1005_cast_fp16_1, var_983_cast_fp16_1))[name = tensor<string, []>("aw_123_cast_fp16")];
+            tensor<string, []> aw_125_equation_0 = const()[name = tensor<string, []>("aw_125_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_125_cast_fp16 = einsum(equation = aw_125_equation_0, values = (var_1005_cast_fp16_2, var_983_cast_fp16_2))[name = tensor<string, []>("aw_125_cast_fp16")];
+            tensor<string, []> aw_127_equation_0 = const()[name = tensor<string, []>("aw_127_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_127_cast_fp16 = einsum(equation = aw_127_equation_0, values = (var_1005_cast_fp16_3, var_983_cast_fp16_3))[name = tensor<string, []>("aw_127_cast_fp16")];
+            tensor<string, []> aw_129_equation_0 = const()[name = tensor<string, []>("aw_129_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_129_cast_fp16 = einsum(equation = aw_129_equation_0, values = (var_1005_cast_fp16_4, var_983_cast_fp16_4))[name = tensor<string, []>("aw_129_cast_fp16")];
+            tensor<string, []> aw_131_equation_0 = const()[name = tensor<string, []>("aw_131_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_131_cast_fp16 = einsum(equation = aw_131_equation_0, values = (var_1005_cast_fp16_5, var_983_cast_fp16_5))[name = tensor<string, []>("aw_131_cast_fp16")];
+            tensor<string, []> aw_133_equation_0 = const()[name = tensor<string, []>("aw_133_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_133_cast_fp16 = einsum(equation = aw_133_equation_0, values = (var_1005_cast_fp16_6, var_983_cast_fp16_6))[name = tensor<string, []>("aw_133_cast_fp16")];
+            tensor<string, []> aw_135_equation_0 = const()[name = tensor<string, []>("aw_135_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_135_cast_fp16 = einsum(equation = aw_135_equation_0, values = (var_1005_cast_fp16_7, var_983_cast_fp16_7))[name = tensor<string, []>("aw_135_cast_fp16")];
+            tensor<string, []> aw_137_equation_0 = const()[name = tensor<string, []>("aw_137_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_137_cast_fp16 = einsum(equation = aw_137_equation_0, values = (var_1005_cast_fp16_8, var_983_cast_fp16_8))[name = tensor<string, []>("aw_137_cast_fp16")];
+            tensor<string, []> aw_139_equation_0 = const()[name = tensor<string, []>("aw_139_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_139_cast_fp16 = einsum(equation = aw_139_equation_0, values = (var_1005_cast_fp16_9, var_983_cast_fp16_9))[name = tensor<string, []>("aw_139_cast_fp16")];
+            tensor<string, []> aw_141_equation_0 = const()[name = tensor<string, []>("aw_141_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_141_cast_fp16 = einsum(equation = aw_141_equation_0, values = (var_1005_cast_fp16_10, var_983_cast_fp16_10))[name = tensor<string, []>("aw_141_cast_fp16")];
+            tensor<string, []> aw_143_equation_0 = const()[name = tensor<string, []>("aw_143_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_143_cast_fp16 = einsum(equation = aw_143_equation_0, values = (var_1005_cast_fp16_11, var_983_cast_fp16_11))[name = tensor<string, []>("aw_143_cast_fp16")];
+            tensor<string, []> aw_145_equation_0 = const()[name = tensor<string, []>("aw_145_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_145_cast_fp16 = einsum(equation = aw_145_equation_0, values = (var_1005_cast_fp16_12, var_983_cast_fp16_12))[name = tensor<string, []>("aw_145_cast_fp16")];
+            tensor<string, []> aw_147_equation_0 = const()[name = tensor<string, []>("aw_147_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_147_cast_fp16 = einsum(equation = aw_147_equation_0, values = (var_1005_cast_fp16_13, var_983_cast_fp16_13))[name = tensor<string, []>("aw_147_cast_fp16")];
+            tensor<string, []> aw_149_equation_0 = const()[name = tensor<string, []>("aw_149_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_149_cast_fp16 = einsum(equation = aw_149_equation_0, values = (var_1005_cast_fp16_14, var_983_cast_fp16_14))[name = tensor<string, []>("aw_149_cast_fp16")];
+            tensor<string, []> aw_151_equation_0 = const()[name = tensor<string, []>("aw_151_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_151_cast_fp16 = einsum(equation = aw_151_equation_0, values = (var_1005_cast_fp16_15, var_983_cast_fp16_15))[name = tensor<string, []>("aw_151_cast_fp16")];
+            tensor<string, []> aw_153_equation_0 = const()[name = tensor<string, []>("aw_153_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_153_cast_fp16 = einsum(equation = aw_153_equation_0, values = (var_1005_cast_fp16_16, var_983_cast_fp16_16))[name = tensor<string, []>("aw_153_cast_fp16")];
+            tensor<string, []> aw_155_equation_0 = const()[name = tensor<string, []>("aw_155_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_155_cast_fp16 = einsum(equation = aw_155_equation_0, values = (var_1005_cast_fp16_17, var_983_cast_fp16_17))[name = tensor<string, []>("aw_155_cast_fp16")];
+            tensor<string, []> aw_157_equation_0 = const()[name = tensor<string, []>("aw_157_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_157_cast_fp16 = einsum(equation = aw_157_equation_0, values = (var_1005_cast_fp16_18, var_983_cast_fp16_18))[name = tensor<string, []>("aw_157_cast_fp16")];
+            tensor<string, []> aw_159_equation_0 = const()[name = tensor<string, []>("aw_159_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_159_cast_fp16 = einsum(equation = aw_159_equation_0, values = (var_1005_cast_fp16_19, var_983_cast_fp16_19))[name = tensor<string, []>("aw_159_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1087_cast_fp16 = softmax(axis = var_931, x = aw_121_cast_fp16)[name = tensor<string, []>("op_1087_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1088_cast_fp16 = softmax(axis = var_931, x = aw_123_cast_fp16)[name = tensor<string, []>("op_1088_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1089_cast_fp16 = softmax(axis = var_931, x = aw_125_cast_fp16)[name = tensor<string, []>("op_1089_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1090_cast_fp16 = softmax(axis = var_931, x = aw_127_cast_fp16)[name = tensor<string, []>("op_1090_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1091_cast_fp16 = softmax(axis = var_931, x = aw_129_cast_fp16)[name = tensor<string, []>("op_1091_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1092_cast_fp16 = softmax(axis = var_931, x = aw_131_cast_fp16)[name = tensor<string, []>("op_1092_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1093_cast_fp16 = softmax(axis = var_931, x = aw_133_cast_fp16)[name = tensor<string, []>("op_1093_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1094_cast_fp16 = softmax(axis = var_931, x = aw_135_cast_fp16)[name = tensor<string, []>("op_1094_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1095_cast_fp16 = softmax(axis = var_931, x = aw_137_cast_fp16)[name = tensor<string, []>("op_1095_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1096_cast_fp16 = softmax(axis = var_931, x = aw_139_cast_fp16)[name = tensor<string, []>("op_1096_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1097_cast_fp16 = softmax(axis = var_931, x = aw_141_cast_fp16)[name = tensor<string, []>("op_1097_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1098_cast_fp16 = softmax(axis = var_931, x = aw_143_cast_fp16)[name = tensor<string, []>("op_1098_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1099_cast_fp16 = softmax(axis = var_931, x = aw_145_cast_fp16)[name = tensor<string, []>("op_1099_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1100_cast_fp16 = softmax(axis = var_931, x = aw_147_cast_fp16)[name = tensor<string, []>("op_1100_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1101_cast_fp16 = softmax(axis = var_931, x = aw_149_cast_fp16)[name = tensor<string, []>("op_1101_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1102_cast_fp16 = softmax(axis = var_931, x = aw_151_cast_fp16)[name = tensor<string, []>("op_1102_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1103_cast_fp16 = softmax(axis = var_931, x = aw_153_cast_fp16)[name = tensor<string, []>("op_1103_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1104_cast_fp16 = softmax(axis = var_931, x = aw_155_cast_fp16)[name = tensor<string, []>("op_1104_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1105_cast_fp16 = softmax(axis = var_931, x = aw_157_cast_fp16)[name = tensor<string, []>("op_1105_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1106_cast_fp16 = softmax(axis = var_931, x = aw_159_cast_fp16)[name = tensor<string, []>("op_1106_cast_fp16")];
+            tensor<string, []> var_1108_equation_0 = const()[name = tensor<string, []>("op_1108_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16 = einsum(equation = var_1108_equation_0, values = (var_1026_cast_fp16_0, var_1087_cast_fp16))[name = tensor<string, []>("op_1108_cast_fp16")];
+            tensor<string, []> var_1110_equation_0 = const()[name = tensor<string, []>("op_1110_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1110_cast_fp16 = einsum(equation = var_1110_equation_0, values = (var_1026_cast_fp16_1, var_1088_cast_fp16))[name = tensor<string, []>("op_1110_cast_fp16")];
+            tensor<string, []> var_1112_equation_0 = const()[name = tensor<string, []>("op_1112_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1112_cast_fp16 = einsum(equation = var_1112_equation_0, values = (var_1026_cast_fp16_2, var_1089_cast_fp16))[name = tensor<string, []>("op_1112_cast_fp16")];
+            tensor<string, []> var_1114_equation_0 = const()[name = tensor<string, []>("op_1114_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1114_cast_fp16 = einsum(equation = var_1114_equation_0, values = (var_1026_cast_fp16_3, var_1090_cast_fp16))[name = tensor<string, []>("op_1114_cast_fp16")];
+            tensor<string, []> var_1116_equation_0 = const()[name = tensor<string, []>("op_1116_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1116_cast_fp16 = einsum(equation = var_1116_equation_0, values = (var_1026_cast_fp16_4, var_1091_cast_fp16))[name = tensor<string, []>("op_1116_cast_fp16")];
+            tensor<string, []> var_1118_equation_0 = const()[name = tensor<string, []>("op_1118_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1118_cast_fp16 = einsum(equation = var_1118_equation_0, values = (var_1026_cast_fp16_5, var_1092_cast_fp16))[name = tensor<string, []>("op_1118_cast_fp16")];
+            tensor<string, []> var_1120_equation_0 = const()[name = tensor<string, []>("op_1120_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1120_cast_fp16 = einsum(equation = var_1120_equation_0, values = (var_1026_cast_fp16_6, var_1093_cast_fp16))[name = tensor<string, []>("op_1120_cast_fp16")];
+            tensor<string, []> var_1122_equation_0 = const()[name = tensor<string, []>("op_1122_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1122_cast_fp16 = einsum(equation = var_1122_equation_0, values = (var_1026_cast_fp16_7, var_1094_cast_fp16))[name = tensor<string, []>("op_1122_cast_fp16")];
+            tensor<string, []> var_1124_equation_0 = const()[name = tensor<string, []>("op_1124_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1124_cast_fp16 = einsum(equation = var_1124_equation_0, values = (var_1026_cast_fp16_8, var_1095_cast_fp16))[name = tensor<string, []>("op_1124_cast_fp16")];
+            tensor<string, []> var_1126_equation_0 = const()[name = tensor<string, []>("op_1126_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1126_cast_fp16 = einsum(equation = var_1126_equation_0, values = (var_1026_cast_fp16_9, var_1096_cast_fp16))[name = tensor<string, []>("op_1126_cast_fp16")];
+            tensor<string, []> var_1128_equation_0 = const()[name = tensor<string, []>("op_1128_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1128_cast_fp16 = einsum(equation = var_1128_equation_0, values = (var_1026_cast_fp16_10, var_1097_cast_fp16))[name = tensor<string, []>("op_1128_cast_fp16")];
+            tensor<string, []> var_1130_equation_0 = const()[name = tensor<string, []>("op_1130_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1130_cast_fp16 = einsum(equation = var_1130_equation_0, values = (var_1026_cast_fp16_11, var_1098_cast_fp16))[name = tensor<string, []>("op_1130_cast_fp16")];
+            tensor<string, []> var_1132_equation_0 = const()[name = tensor<string, []>("op_1132_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1132_cast_fp16 = einsum(equation = var_1132_equation_0, values = (var_1026_cast_fp16_12, var_1099_cast_fp16))[name = tensor<string, []>("op_1132_cast_fp16")];
+            tensor<string, []> var_1134_equation_0 = const()[name = tensor<string, []>("op_1134_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1134_cast_fp16 = einsum(equation = var_1134_equation_0, values = (var_1026_cast_fp16_13, var_1100_cast_fp16))[name = tensor<string, []>("op_1134_cast_fp16")];
+            tensor<string, []> var_1136_equation_0 = const()[name = tensor<string, []>("op_1136_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1136_cast_fp16 = einsum(equation = var_1136_equation_0, values = (var_1026_cast_fp16_14, var_1101_cast_fp16))[name = tensor<string, []>("op_1136_cast_fp16")];
+            tensor<string, []> var_1138_equation_0 = const()[name = tensor<string, []>("op_1138_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1138_cast_fp16 = einsum(equation = var_1138_equation_0, values = (var_1026_cast_fp16_15, var_1102_cast_fp16))[name = tensor<string, []>("op_1138_cast_fp16")];
+            tensor<string, []> var_1140_equation_0 = const()[name = tensor<string, []>("op_1140_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1140_cast_fp16 = einsum(equation = var_1140_equation_0, values = (var_1026_cast_fp16_16, var_1103_cast_fp16))[name = tensor<string, []>("op_1140_cast_fp16")];
+            tensor<string, []> var_1142_equation_0 = const()[name = tensor<string, []>("op_1142_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1142_cast_fp16 = einsum(equation = var_1142_equation_0, values = (var_1026_cast_fp16_17, var_1104_cast_fp16))[name = tensor<string, []>("op_1142_cast_fp16")];
+            tensor<string, []> var_1144_equation_0 = const()[name = tensor<string, []>("op_1144_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1144_cast_fp16 = einsum(equation = var_1144_equation_0, values = (var_1026_cast_fp16_18, var_1105_cast_fp16))[name = tensor<string, []>("op_1144_cast_fp16")];
+            tensor<string, []> var_1146_equation_0 = const()[name = tensor<string, []>("op_1146_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1146_cast_fp16 = einsum(equation = var_1146_equation_0, values = (var_1026_cast_fp16_19, var_1106_cast_fp16))[name = tensor<string, []>("op_1146_cast_fp16")];
+            tensor<bool, []> input_35_interleave_0 = const()[name = tensor<string, []>("input_35_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_35_cast_fp16 = concat(axis = var_931, interleave = input_35_interleave_0, values = (var_1108_cast_fp16, var_1110_cast_fp16, var_1112_cast_fp16, var_1114_cast_fp16, var_1116_cast_fp16, var_1118_cast_fp16, var_1120_cast_fp16, var_1122_cast_fp16, var_1124_cast_fp16, var_1126_cast_fp16, var_1128_cast_fp16, var_1130_cast_fp16, var_1132_cast_fp16, var_1134_cast_fp16, var_1136_cast_fp16, var_1138_cast_fp16, var_1140_cast_fp16, var_1142_cast_fp16, var_1144_cast_fp16, var_1146_cast_fp16))[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<string, []> var_1155_pad_type_0 = const()[name = tensor<string, []>("op_1155_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1155_strides_0 = const()[name = tensor<string, []>("op_1155_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1155_pad_0 = const()[name = tensor<string, []>("op_1155_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1155_dilations_0 = const()[name = tensor<string, []>("op_1155_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1155_groups_0 = const()[name = tensor<string, []>("op_1155_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_3_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142191232)))];
+            tensor<fp16, [1280]> blocks_3_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145468096)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1155_cast_fp16 = conv(bias = blocks_3_attn_out_bias_to_fp16, dilations = var_1155_dilations_0, groups = var_1155_groups_0, pad = var_1155_pad_0, pad_type = var_1155_pad_type_0, strides = var_1155_strides_0, weight = blocks_3_attn_out_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("op_1155_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = var_1155_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> input_37_axes_0 = const()[name = tensor<string, []>("input_37_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_37_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145470720)))];
+            tensor<fp16, [1280]> input_37_beta_0_to_fp16 = const()[name = tensor<string, []>("input_37_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145473344)))];
+            tensor<fp16, []> var_1165_to_fp16 = const()[name = tensor<string, []>("op_1165_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = input_37_beta_0_to_fp16, epsilon = var_1165_to_fp16, gamma = input_37_gamma_0_to_fp16, x = inputs_15_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_pad_type_0 = const()[name = tensor<string, []>("input_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_39_strides_0 = const()[name = tensor<string, []>("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_39_pad_0 = const()[name = tensor<string, []>("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_39_dilations_0 = const()[name = tensor<string, []>("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_39_groups_0 = const()[name = tensor<string, []>("input_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_3_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145475968)))];
+            tensor<fp16, [5120]> blocks_3_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(158583232)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_39_cast_fp16 = conv(bias = blocks_3_mlp_0_bias_to_fp16, dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = blocks_3_mlp_0_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<string, []> input_41_mode_0 = const()[name = tensor<string, []>("input_41_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = input_39_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<string, []> var_1191_pad_type_0 = const()[name = tensor<string, []>("op_1191_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1191_strides_0 = const()[name = tensor<string, []>("op_1191_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1191_pad_0 = const()[name = tensor<string, []>("op_1191_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1191_dilations_0 = const()[name = tensor<string, []>("op_1191_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1191_groups_0 = const()[name = tensor<string, []>("op_1191_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_3_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(158593536)))];
+            tensor<fp16, [1280]> blocks_3_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171700800)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1191_cast_fp16 = conv(bias = blocks_3_mlp_2_bias_to_fp16, dilations = var_1191_dilations_0, groups = var_1191_groups_0, pad = var_1191_pad_0, pad_type = var_1191_pad_type_0, strides = var_1191_strides_0, weight = blocks_3_mlp_2_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("op_1191_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = var_1191_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, []> var_1200 = const()[name = tensor<string, []>("op_1200"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_43_axes_0 = const()[name = tensor<string, []>("input_43_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_43_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171703424)))];
+            tensor<fp16, [1280]> input_43_beta_0_to_fp16 = const()[name = tensor<string, []>("input_43_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171706048)))];
+            tensor<fp16, []> var_1216_to_fp16 = const()[name = tensor<string, []>("op_1216_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_43_cast_fp16 = layer_norm(axes = input_43_axes_0, beta = input_43_beta_0_to_fp16, epsilon = var_1216_to_fp16, gamma = input_43_gamma_0_to_fp16, x = inputs_17_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<string, []> q_9_pad_type_0 = const()[name = tensor<string, []>("q_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_9_strides_0 = const()[name = tensor<string, []>("q_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_9_pad_0 = const()[name = tensor<string, []>("q_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_9_dilations_0 = const()[name = tensor<string, []>("q_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_9_groups_0 = const()[name = tensor<string, []>("q_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_1251_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1251_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171708672)))];
+            tensor<fp16, [1280]> var_1251_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1251_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174985536)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1251_cast_fp16 = conv(bias = var_1251_bias_0_to_fp16, dilations = q_9_dilations_0, groups = q_9_groups_0, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = q_9_strides_0, weight = var_1251_weight_0_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_1251_cast_fp16")];
+            tensor<string, []> k_9_pad_type_0 = const()[name = tensor<string, []>("k_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_9_strides_0 = const()[name = tensor<string, []>("k_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_9_pad_0 = const()[name = tensor<string, []>("k_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_9_dilations_0 = const()[name = tensor<string, []>("k_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_9_groups_0 = const()[name = tensor<string, []>("k_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_4_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(174988160)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_9_cast_fp16 = conv(dilations = k_9_dilations_0, groups = k_9_groups_0, pad = k_9_pad_0, pad_type = k_9_pad_type_0, strides = k_9_strides_0, weight = blocks_4_attn_key_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("k_9_cast_fp16")];
+            tensor<string, []> var_1249_pad_type_0 = const()[name = tensor<string, []>("op_1249_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1249_strides_0 = const()[name = tensor<string, []>("op_1249_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1249_pad_0 = const()[name = tensor<string, []>("op_1249_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1249_dilations_0 = const()[name = tensor<string, []>("op_1249_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1249_groups_0 = const()[name = tensor<string, []>("op_1249_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_4_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(178265024)))];
+            tensor<fp16, [1280]> blocks_4_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(181541888)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1249_cast_fp16 = conv(bias = blocks_4_attn_value_bias_to_fp16, dilations = var_1249_dilations_0, groups = var_1249_groups_0, pad = var_1249_pad_0, pad_type = var_1249_pad_type_0, strides = var_1249_strides_0, weight = blocks_4_attn_value_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_1249_cast_fp16")];
+            tensor<int32, [20]> tile_12 = const()[name = tensor<string, []>("tile_12"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1252_axis_0 = const()[name = tensor<string, []>("op_1252_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_19 = split(axis = var_1252_axis_0, split_sizes = tile_12, x = var_1251_cast_fp16)[name = tensor<string, []>("op_1252_cast_fp16")];
+            tensor<int32, [4]> var_1273_perm_0 = const()[name = tensor<string, []>("op_1273_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_13 = const()[name = tensor<string, []>("tile_13"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1274_axis_0 = const()[name = tensor<string, []>("op_1274_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_1273_cast_fp16 = transpose(perm = var_1273_perm_0, x = k_9_cast_fp16)[name = tensor<string, []>("transpose_28")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_19 = split(axis = var_1274_axis_0, split_sizes = tile_13, x = var_1273_cast_fp16)[name = tensor<string, []>("op_1274_cast_fp16")];
+            tensor<int32, [20]> tile_14 = const()[name = tensor<string, []>("tile_14"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1295_axis_0 = const()[name = tensor<string, []>("op_1295_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_19 = split(axis = var_1295_axis_0, split_sizes = tile_14, x = var_1249_cast_fp16)[name = tensor<string, []>("op_1295_cast_fp16")];
+            tensor<string, []> aw_161_equation_0 = const()[name = tensor<string, []>("aw_161_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_161_cast_fp16 = einsum(equation = aw_161_equation_0, values = (var_1274_cast_fp16_0, var_1252_cast_fp16_0))[name = tensor<string, []>("aw_161_cast_fp16")];
+            tensor<string, []> aw_163_equation_0 = const()[name = tensor<string, []>("aw_163_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_163_cast_fp16 = einsum(equation = aw_163_equation_0, values = (var_1274_cast_fp16_1, var_1252_cast_fp16_1))[name = tensor<string, []>("aw_163_cast_fp16")];
+            tensor<string, []> aw_165_equation_0 = const()[name = tensor<string, []>("aw_165_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_165_cast_fp16 = einsum(equation = aw_165_equation_0, values = (var_1274_cast_fp16_2, var_1252_cast_fp16_2))[name = tensor<string, []>("aw_165_cast_fp16")];
+            tensor<string, []> aw_167_equation_0 = const()[name = tensor<string, []>("aw_167_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_167_cast_fp16 = einsum(equation = aw_167_equation_0, values = (var_1274_cast_fp16_3, var_1252_cast_fp16_3))[name = tensor<string, []>("aw_167_cast_fp16")];
+            tensor<string, []> aw_169_equation_0 = const()[name = tensor<string, []>("aw_169_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_169_cast_fp16 = einsum(equation = aw_169_equation_0, values = (var_1274_cast_fp16_4, var_1252_cast_fp16_4))[name = tensor<string, []>("aw_169_cast_fp16")];
+            tensor<string, []> aw_171_equation_0 = const()[name = tensor<string, []>("aw_171_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_171_cast_fp16 = einsum(equation = aw_171_equation_0, values = (var_1274_cast_fp16_5, var_1252_cast_fp16_5))[name = tensor<string, []>("aw_171_cast_fp16")];
+            tensor<string, []> aw_173_equation_0 = const()[name = tensor<string, []>("aw_173_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_173_cast_fp16 = einsum(equation = aw_173_equation_0, values = (var_1274_cast_fp16_6, var_1252_cast_fp16_6))[name = tensor<string, []>("aw_173_cast_fp16")];
+            tensor<string, []> aw_175_equation_0 = const()[name = tensor<string, []>("aw_175_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_175_cast_fp16 = einsum(equation = aw_175_equation_0, values = (var_1274_cast_fp16_7, var_1252_cast_fp16_7))[name = tensor<string, []>("aw_175_cast_fp16")];
+            tensor<string, []> aw_177_equation_0 = const()[name = tensor<string, []>("aw_177_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_177_cast_fp16 = einsum(equation = aw_177_equation_0, values = (var_1274_cast_fp16_8, var_1252_cast_fp16_8))[name = tensor<string, []>("aw_177_cast_fp16")];
+            tensor<string, []> aw_179_equation_0 = const()[name = tensor<string, []>("aw_179_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_179_cast_fp16 = einsum(equation = aw_179_equation_0, values = (var_1274_cast_fp16_9, var_1252_cast_fp16_9))[name = tensor<string, []>("aw_179_cast_fp16")];
+            tensor<string, []> aw_181_equation_0 = const()[name = tensor<string, []>("aw_181_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_181_cast_fp16 = einsum(equation = aw_181_equation_0, values = (var_1274_cast_fp16_10, var_1252_cast_fp16_10))[name = tensor<string, []>("aw_181_cast_fp16")];
+            tensor<string, []> aw_183_equation_0 = const()[name = tensor<string, []>("aw_183_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_183_cast_fp16 = einsum(equation = aw_183_equation_0, values = (var_1274_cast_fp16_11, var_1252_cast_fp16_11))[name = tensor<string, []>("aw_183_cast_fp16")];
+            tensor<string, []> aw_185_equation_0 = const()[name = tensor<string, []>("aw_185_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_185_cast_fp16 = einsum(equation = aw_185_equation_0, values = (var_1274_cast_fp16_12, var_1252_cast_fp16_12))[name = tensor<string, []>("aw_185_cast_fp16")];
+            tensor<string, []> aw_187_equation_0 = const()[name = tensor<string, []>("aw_187_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_187_cast_fp16 = einsum(equation = aw_187_equation_0, values = (var_1274_cast_fp16_13, var_1252_cast_fp16_13))[name = tensor<string, []>("aw_187_cast_fp16")];
+            tensor<string, []> aw_189_equation_0 = const()[name = tensor<string, []>("aw_189_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_189_cast_fp16 = einsum(equation = aw_189_equation_0, values = (var_1274_cast_fp16_14, var_1252_cast_fp16_14))[name = tensor<string, []>("aw_189_cast_fp16")];
+            tensor<string, []> aw_191_equation_0 = const()[name = tensor<string, []>("aw_191_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_191_cast_fp16 = einsum(equation = aw_191_equation_0, values = (var_1274_cast_fp16_15, var_1252_cast_fp16_15))[name = tensor<string, []>("aw_191_cast_fp16")];
+            tensor<string, []> aw_193_equation_0 = const()[name = tensor<string, []>("aw_193_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_193_cast_fp16 = einsum(equation = aw_193_equation_0, values = (var_1274_cast_fp16_16, var_1252_cast_fp16_16))[name = tensor<string, []>("aw_193_cast_fp16")];
+            tensor<string, []> aw_195_equation_0 = const()[name = tensor<string, []>("aw_195_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_195_cast_fp16 = einsum(equation = aw_195_equation_0, values = (var_1274_cast_fp16_17, var_1252_cast_fp16_17))[name = tensor<string, []>("aw_195_cast_fp16")];
+            tensor<string, []> aw_197_equation_0 = const()[name = tensor<string, []>("aw_197_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_197_cast_fp16 = einsum(equation = aw_197_equation_0, values = (var_1274_cast_fp16_18, var_1252_cast_fp16_18))[name = tensor<string, []>("aw_197_cast_fp16")];
+            tensor<string, []> aw_199_equation_0 = const()[name = tensor<string, []>("aw_199_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_199_cast_fp16 = einsum(equation = aw_199_equation_0, values = (var_1274_cast_fp16_19, var_1252_cast_fp16_19))[name = tensor<string, []>("aw_199_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1356_cast_fp16 = softmax(axis = var_1200, x = aw_161_cast_fp16)[name = tensor<string, []>("op_1356_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1357_cast_fp16 = softmax(axis = var_1200, x = aw_163_cast_fp16)[name = tensor<string, []>("op_1357_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1358_cast_fp16 = softmax(axis = var_1200, x = aw_165_cast_fp16)[name = tensor<string, []>("op_1358_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1359_cast_fp16 = softmax(axis = var_1200, x = aw_167_cast_fp16)[name = tensor<string, []>("op_1359_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1360_cast_fp16 = softmax(axis = var_1200, x = aw_169_cast_fp16)[name = tensor<string, []>("op_1360_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1361_cast_fp16 = softmax(axis = var_1200, x = aw_171_cast_fp16)[name = tensor<string, []>("op_1361_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1362_cast_fp16 = softmax(axis = var_1200, x = aw_173_cast_fp16)[name = tensor<string, []>("op_1362_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1363_cast_fp16 = softmax(axis = var_1200, x = aw_175_cast_fp16)[name = tensor<string, []>("op_1363_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1364_cast_fp16 = softmax(axis = var_1200, x = aw_177_cast_fp16)[name = tensor<string, []>("op_1364_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1365_cast_fp16 = softmax(axis = var_1200, x = aw_179_cast_fp16)[name = tensor<string, []>("op_1365_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1366_cast_fp16 = softmax(axis = var_1200, x = aw_181_cast_fp16)[name = tensor<string, []>("op_1366_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1367_cast_fp16 = softmax(axis = var_1200, x = aw_183_cast_fp16)[name = tensor<string, []>("op_1367_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1368_cast_fp16 = softmax(axis = var_1200, x = aw_185_cast_fp16)[name = tensor<string, []>("op_1368_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1369_cast_fp16 = softmax(axis = var_1200, x = aw_187_cast_fp16)[name = tensor<string, []>("op_1369_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1370_cast_fp16 = softmax(axis = var_1200, x = aw_189_cast_fp16)[name = tensor<string, []>("op_1370_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1371_cast_fp16 = softmax(axis = var_1200, x = aw_191_cast_fp16)[name = tensor<string, []>("op_1371_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1372_cast_fp16 = softmax(axis = var_1200, x = aw_193_cast_fp16)[name = tensor<string, []>("op_1372_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1373_cast_fp16 = softmax(axis = var_1200, x = aw_195_cast_fp16)[name = tensor<string, []>("op_1373_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1374_cast_fp16 = softmax(axis = var_1200, x = aw_197_cast_fp16)[name = tensor<string, []>("op_1374_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1375_cast_fp16 = softmax(axis = var_1200, x = aw_199_cast_fp16)[name = tensor<string, []>("op_1375_cast_fp16")];
+            tensor<string, []> var_1377_equation_0 = const()[name = tensor<string, []>("op_1377_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1377_cast_fp16 = einsum(equation = var_1377_equation_0, values = (var_1295_cast_fp16_0, var_1356_cast_fp16))[name = tensor<string, []>("op_1377_cast_fp16")];
+            tensor<string, []> var_1379_equation_0 = const()[name = tensor<string, []>("op_1379_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1379_cast_fp16 = einsum(equation = var_1379_equation_0, values = (var_1295_cast_fp16_1, var_1357_cast_fp16))[name = tensor<string, []>("op_1379_cast_fp16")];
+            tensor<string, []> var_1381_equation_0 = const()[name = tensor<string, []>("op_1381_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1381_cast_fp16 = einsum(equation = var_1381_equation_0, values = (var_1295_cast_fp16_2, var_1358_cast_fp16))[name = tensor<string, []>("op_1381_cast_fp16")];
+            tensor<string, []> var_1383_equation_0 = const()[name = tensor<string, []>("op_1383_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1383_cast_fp16 = einsum(equation = var_1383_equation_0, values = (var_1295_cast_fp16_3, var_1359_cast_fp16))[name = tensor<string, []>("op_1383_cast_fp16")];
+            tensor<string, []> var_1385_equation_0 = const()[name = tensor<string, []>("op_1385_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1385_cast_fp16 = einsum(equation = var_1385_equation_0, values = (var_1295_cast_fp16_4, var_1360_cast_fp16))[name = tensor<string, []>("op_1385_cast_fp16")];
+            tensor<string, []> var_1387_equation_0 = const()[name = tensor<string, []>("op_1387_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1387_cast_fp16 = einsum(equation = var_1387_equation_0, values = (var_1295_cast_fp16_5, var_1361_cast_fp16))[name = tensor<string, []>("op_1387_cast_fp16")];
+            tensor<string, []> var_1389_equation_0 = const()[name = tensor<string, []>("op_1389_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1389_cast_fp16 = einsum(equation = var_1389_equation_0, values = (var_1295_cast_fp16_6, var_1362_cast_fp16))[name = tensor<string, []>("op_1389_cast_fp16")];
+            tensor<string, []> var_1391_equation_0 = const()[name = tensor<string, []>("op_1391_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1391_cast_fp16 = einsum(equation = var_1391_equation_0, values = (var_1295_cast_fp16_7, var_1363_cast_fp16))[name = tensor<string, []>("op_1391_cast_fp16")];
+            tensor<string, []> var_1393_equation_0 = const()[name = tensor<string, []>("op_1393_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16 = einsum(equation = var_1393_equation_0, values = (var_1295_cast_fp16_8, var_1364_cast_fp16))[name = tensor<string, []>("op_1393_cast_fp16")];
+            tensor<string, []> var_1395_equation_0 = const()[name = tensor<string, []>("op_1395_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1395_cast_fp16 = einsum(equation = var_1395_equation_0, values = (var_1295_cast_fp16_9, var_1365_cast_fp16))[name = tensor<string, []>("op_1395_cast_fp16")];
+            tensor<string, []> var_1397_equation_0 = const()[name = tensor<string, []>("op_1397_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1397_cast_fp16 = einsum(equation = var_1397_equation_0, values = (var_1295_cast_fp16_10, var_1366_cast_fp16))[name = tensor<string, []>("op_1397_cast_fp16")];
+            tensor<string, []> var_1399_equation_0 = const()[name = tensor<string, []>("op_1399_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1399_cast_fp16 = einsum(equation = var_1399_equation_0, values = (var_1295_cast_fp16_11, var_1367_cast_fp16))[name = tensor<string, []>("op_1399_cast_fp16")];
+            tensor<string, []> var_1401_equation_0 = const()[name = tensor<string, []>("op_1401_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1401_cast_fp16 = einsum(equation = var_1401_equation_0, values = (var_1295_cast_fp16_12, var_1368_cast_fp16))[name = tensor<string, []>("op_1401_cast_fp16")];
+            tensor<string, []> var_1403_equation_0 = const()[name = tensor<string, []>("op_1403_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1403_cast_fp16 = einsum(equation = var_1403_equation_0, values = (var_1295_cast_fp16_13, var_1369_cast_fp16))[name = tensor<string, []>("op_1403_cast_fp16")];
+            tensor<string, []> var_1405_equation_0 = const()[name = tensor<string, []>("op_1405_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1405_cast_fp16 = einsum(equation = var_1405_equation_0, values = (var_1295_cast_fp16_14, var_1370_cast_fp16))[name = tensor<string, []>("op_1405_cast_fp16")];
+            tensor<string, []> var_1407_equation_0 = const()[name = tensor<string, []>("op_1407_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1407_cast_fp16 = einsum(equation = var_1407_equation_0, values = (var_1295_cast_fp16_15, var_1371_cast_fp16))[name = tensor<string, []>("op_1407_cast_fp16")];
+            tensor<string, []> var_1409_equation_0 = const()[name = tensor<string, []>("op_1409_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1409_cast_fp16 = einsum(equation = var_1409_equation_0, values = (var_1295_cast_fp16_16, var_1372_cast_fp16))[name = tensor<string, []>("op_1409_cast_fp16")];
+            tensor<string, []> var_1411_equation_0 = const()[name = tensor<string, []>("op_1411_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1411_cast_fp16 = einsum(equation = var_1411_equation_0, values = (var_1295_cast_fp16_17, var_1373_cast_fp16))[name = tensor<string, []>("op_1411_cast_fp16")];
+            tensor<string, []> var_1413_equation_0 = const()[name = tensor<string, []>("op_1413_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1413_cast_fp16 = einsum(equation = var_1413_equation_0, values = (var_1295_cast_fp16_18, var_1374_cast_fp16))[name = tensor<string, []>("op_1413_cast_fp16")];
+            tensor<string, []> var_1415_equation_0 = const()[name = tensor<string, []>("op_1415_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1415_cast_fp16 = einsum(equation = var_1415_equation_0, values = (var_1295_cast_fp16_19, var_1375_cast_fp16))[name = tensor<string, []>("op_1415_cast_fp16")];
+            tensor<bool, []> input_45_interleave_0 = const()[name = tensor<string, []>("input_45_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_45_cast_fp16 = concat(axis = var_1200, interleave = input_45_interleave_0, values = (var_1377_cast_fp16, var_1379_cast_fp16, var_1381_cast_fp16, var_1383_cast_fp16, var_1385_cast_fp16, var_1387_cast_fp16, var_1389_cast_fp16, var_1391_cast_fp16, var_1393_cast_fp16, var_1395_cast_fp16, var_1397_cast_fp16, var_1399_cast_fp16, var_1401_cast_fp16, var_1403_cast_fp16, var_1405_cast_fp16, var_1407_cast_fp16, var_1409_cast_fp16, var_1411_cast_fp16, var_1413_cast_fp16, var_1415_cast_fp16))[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<string, []> var_1424_pad_type_0 = const()[name = tensor<string, []>("op_1424_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1424_strides_0 = const()[name = tensor<string, []>("op_1424_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1424_pad_0 = const()[name = tensor<string, []>("op_1424_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1424_dilations_0 = const()[name = tensor<string, []>("op_1424_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1424_groups_0 = const()[name = tensor<string, []>("op_1424_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_4_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(181544512)))];
+            tensor<fp16, [1280]> blocks_4_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184821376)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1424_cast_fp16 = conv(bias = blocks_4_attn_out_bias_to_fp16, dilations = var_1424_dilations_0, groups = var_1424_groups_0, pad = var_1424_pad_0, pad_type = var_1424_pad_type_0, strides = var_1424_strides_0, weight = blocks_4_attn_out_weight_to_fp16, x = input_45_cast_fp16)[name = tensor<string, []>("op_1424_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = var_1424_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, [1]> input_47_axes_0 = const()[name = tensor<string, []>("input_47_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_47_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_47_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184824000)))];
+            tensor<fp16, [1280]> input_47_beta_0_to_fp16 = const()[name = tensor<string, []>("input_47_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184826624)))];
+            tensor<fp16, []> var_1434_to_fp16 = const()[name = tensor<string, []>("op_1434_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, beta = input_47_beta_0_to_fp16, epsilon = var_1434_to_fp16, gamma = input_47_gamma_0_to_fp16, x = inputs_19_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<string, []> input_49_pad_type_0 = const()[name = tensor<string, []>("input_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_49_strides_0 = const()[name = tensor<string, []>("input_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_49_pad_0 = const()[name = tensor<string, []>("input_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_49_dilations_0 = const()[name = tensor<string, []>("input_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_49_groups_0 = const()[name = tensor<string, []>("input_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_4_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184829248)))];
+            tensor<fp16, [5120]> blocks_4_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197936512)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_49_cast_fp16 = conv(bias = blocks_4_mlp_0_bias_to_fp16, dilations = input_49_dilations_0, groups = input_49_groups_0, pad = input_49_pad_0, pad_type = input_49_pad_type_0, strides = input_49_strides_0, weight = blocks_4_mlp_0_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<string, []> input_51_mode_0 = const()[name = tensor<string, []>("input_51_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_51_cast_fp16 = gelu(mode = input_51_mode_0, x = input_49_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<string, []> var_1460_pad_type_0 = const()[name = tensor<string, []>("op_1460_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1460_strides_0 = const()[name = tensor<string, []>("op_1460_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1460_pad_0 = const()[name = tensor<string, []>("op_1460_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1460_dilations_0 = const()[name = tensor<string, []>("op_1460_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1460_groups_0 = const()[name = tensor<string, []>("op_1460_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_4_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197946816)))];
+            tensor<fp16, [1280]> blocks_4_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211054080)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1460_cast_fp16 = conv(bias = blocks_4_mlp_2_bias_to_fp16, dilations = var_1460_dilations_0, groups = var_1460_groups_0, pad = var_1460_pad_0, pad_type = var_1460_pad_type_0, strides = var_1460_strides_0, weight = blocks_4_mlp_2_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("op_1460_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = var_1460_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, []> var_1469 = const()[name = tensor<string, []>("op_1469"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_53_axes_0 = const()[name = tensor<string, []>("input_53_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_53_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_53_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211056704)))];
+            tensor<fp16, [1280]> input_53_beta_0_to_fp16 = const()[name = tensor<string, []>("input_53_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211059328)))];
+            tensor<fp16, []> var_1485_to_fp16 = const()[name = tensor<string, []>("op_1485_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_53_cast_fp16 = layer_norm(axes = input_53_axes_0, beta = input_53_beta_0_to_fp16, epsilon = var_1485_to_fp16, gamma = input_53_gamma_0_to_fp16, x = inputs_21_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<string, []> q_11_pad_type_0 = const()[name = tensor<string, []>("q_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_11_strides_0 = const()[name = tensor<string, []>("q_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_11_pad_0 = const()[name = tensor<string, []>("q_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_11_dilations_0 = const()[name = tensor<string, []>("q_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_11_groups_0 = const()[name = tensor<string, []>("q_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_1520_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1520_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211061952)))];
+            tensor<fp16, [1280]> var_1520_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1520_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214338816)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1520_cast_fp16 = conv(bias = var_1520_bias_0_to_fp16, dilations = q_11_dilations_0, groups = q_11_groups_0, pad = q_11_pad_0, pad_type = q_11_pad_type_0, strides = q_11_strides_0, weight = var_1520_weight_0_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_1520_cast_fp16")];
+            tensor<string, []> k_11_pad_type_0 = const()[name = tensor<string, []>("k_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_11_strides_0 = const()[name = tensor<string, []>("k_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_11_pad_0 = const()[name = tensor<string, []>("k_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_11_dilations_0 = const()[name = tensor<string, []>("k_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_11_groups_0 = const()[name = tensor<string, []>("k_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_5_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214341440)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_11_cast_fp16 = conv(dilations = k_11_dilations_0, groups = k_11_groups_0, pad = k_11_pad_0, pad_type = k_11_pad_type_0, strides = k_11_strides_0, weight = blocks_5_attn_key_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("k_11_cast_fp16")];
+            tensor<string, []> var_1518_pad_type_0 = const()[name = tensor<string, []>("op_1518_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1518_strides_0 = const()[name = tensor<string, []>("op_1518_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1518_pad_0 = const()[name = tensor<string, []>("op_1518_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1518_dilations_0 = const()[name = tensor<string, []>("op_1518_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1518_groups_0 = const()[name = tensor<string, []>("op_1518_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_5_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217618304)))];
+            tensor<fp16, [1280]> blocks_5_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(220895168)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1518_cast_fp16 = conv(bias = blocks_5_attn_value_bias_to_fp16, dilations = var_1518_dilations_0, groups = var_1518_groups_0, pad = var_1518_pad_0, pad_type = var_1518_pad_type_0, strides = var_1518_strides_0, weight = blocks_5_attn_value_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_1518_cast_fp16")];
+            tensor<int32, [20]> tile_15 = const()[name = tensor<string, []>("tile_15"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1521_axis_0 = const()[name = tensor<string, []>("op_1521_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_19 = split(axis = var_1521_axis_0, split_sizes = tile_15, x = var_1520_cast_fp16)[name = tensor<string, []>("op_1521_cast_fp16")];
+            tensor<int32, [4]> var_1542_perm_0 = const()[name = tensor<string, []>("op_1542_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_16 = const()[name = tensor<string, []>("tile_16"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1543_axis_0 = const()[name = tensor<string, []>("op_1543_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_1542_cast_fp16 = transpose(perm = var_1542_perm_0, x = k_11_cast_fp16)[name = tensor<string, []>("transpose_27")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_19 = split(axis = var_1543_axis_0, split_sizes = tile_16, x = var_1542_cast_fp16)[name = tensor<string, []>("op_1543_cast_fp16")];
+            tensor<int32, [20]> tile_17 = const()[name = tensor<string, []>("tile_17"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1564_axis_0 = const()[name = tensor<string, []>("op_1564_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_19 = split(axis = var_1564_axis_0, split_sizes = tile_17, x = var_1518_cast_fp16)[name = tensor<string, []>("op_1564_cast_fp16")];
+            tensor<string, []> aw_201_equation_0 = const()[name = tensor<string, []>("aw_201_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_201_cast_fp16 = einsum(equation = aw_201_equation_0, values = (var_1543_cast_fp16_0, var_1521_cast_fp16_0))[name = tensor<string, []>("aw_201_cast_fp16")];
+            tensor<string, []> aw_203_equation_0 = const()[name = tensor<string, []>("aw_203_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_203_cast_fp16 = einsum(equation = aw_203_equation_0, values = (var_1543_cast_fp16_1, var_1521_cast_fp16_1))[name = tensor<string, []>("aw_203_cast_fp16")];
+            tensor<string, []> aw_205_equation_0 = const()[name = tensor<string, []>("aw_205_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_205_cast_fp16 = einsum(equation = aw_205_equation_0, values = (var_1543_cast_fp16_2, var_1521_cast_fp16_2))[name = tensor<string, []>("aw_205_cast_fp16")];
+            tensor<string, []> aw_207_equation_0 = const()[name = tensor<string, []>("aw_207_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_207_cast_fp16 = einsum(equation = aw_207_equation_0, values = (var_1543_cast_fp16_3, var_1521_cast_fp16_3))[name = tensor<string, []>("aw_207_cast_fp16")];
+            tensor<string, []> aw_209_equation_0 = const()[name = tensor<string, []>("aw_209_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_209_cast_fp16 = einsum(equation = aw_209_equation_0, values = (var_1543_cast_fp16_4, var_1521_cast_fp16_4))[name = tensor<string, []>("aw_209_cast_fp16")];
+            tensor<string, []> aw_211_equation_0 = const()[name = tensor<string, []>("aw_211_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_211_cast_fp16 = einsum(equation = aw_211_equation_0, values = (var_1543_cast_fp16_5, var_1521_cast_fp16_5))[name = tensor<string, []>("aw_211_cast_fp16")];
+            tensor<string, []> aw_213_equation_0 = const()[name = tensor<string, []>("aw_213_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_213_cast_fp16 = einsum(equation = aw_213_equation_0, values = (var_1543_cast_fp16_6, var_1521_cast_fp16_6))[name = tensor<string, []>("aw_213_cast_fp16")];
+            tensor<string, []> aw_215_equation_0 = const()[name = tensor<string, []>("aw_215_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_215_cast_fp16 = einsum(equation = aw_215_equation_0, values = (var_1543_cast_fp16_7, var_1521_cast_fp16_7))[name = tensor<string, []>("aw_215_cast_fp16")];
+            tensor<string, []> aw_217_equation_0 = const()[name = tensor<string, []>("aw_217_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_217_cast_fp16 = einsum(equation = aw_217_equation_0, values = (var_1543_cast_fp16_8, var_1521_cast_fp16_8))[name = tensor<string, []>("aw_217_cast_fp16")];
+            tensor<string, []> aw_219_equation_0 = const()[name = tensor<string, []>("aw_219_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_219_cast_fp16 = einsum(equation = aw_219_equation_0, values = (var_1543_cast_fp16_9, var_1521_cast_fp16_9))[name = tensor<string, []>("aw_219_cast_fp16")];
+            tensor<string, []> aw_221_equation_0 = const()[name = tensor<string, []>("aw_221_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_221_cast_fp16 = einsum(equation = aw_221_equation_0, values = (var_1543_cast_fp16_10, var_1521_cast_fp16_10))[name = tensor<string, []>("aw_221_cast_fp16")];
+            tensor<string, []> aw_223_equation_0 = const()[name = tensor<string, []>("aw_223_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_223_cast_fp16 = einsum(equation = aw_223_equation_0, values = (var_1543_cast_fp16_11, var_1521_cast_fp16_11))[name = tensor<string, []>("aw_223_cast_fp16")];
+            tensor<string, []> aw_225_equation_0 = const()[name = tensor<string, []>("aw_225_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_225_cast_fp16 = einsum(equation = aw_225_equation_0, values = (var_1543_cast_fp16_12, var_1521_cast_fp16_12))[name = tensor<string, []>("aw_225_cast_fp16")];
+            tensor<string, []> aw_227_equation_0 = const()[name = tensor<string, []>("aw_227_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_227_cast_fp16 = einsum(equation = aw_227_equation_0, values = (var_1543_cast_fp16_13, var_1521_cast_fp16_13))[name = tensor<string, []>("aw_227_cast_fp16")];
+            tensor<string, []> aw_229_equation_0 = const()[name = tensor<string, []>("aw_229_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_229_cast_fp16 = einsum(equation = aw_229_equation_0, values = (var_1543_cast_fp16_14, var_1521_cast_fp16_14))[name = tensor<string, []>("aw_229_cast_fp16")];
+            tensor<string, []> aw_231_equation_0 = const()[name = tensor<string, []>("aw_231_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_231_cast_fp16 = einsum(equation = aw_231_equation_0, values = (var_1543_cast_fp16_15, var_1521_cast_fp16_15))[name = tensor<string, []>("aw_231_cast_fp16")];
+            tensor<string, []> aw_233_equation_0 = const()[name = tensor<string, []>("aw_233_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_233_cast_fp16 = einsum(equation = aw_233_equation_0, values = (var_1543_cast_fp16_16, var_1521_cast_fp16_16))[name = tensor<string, []>("aw_233_cast_fp16")];
+            tensor<string, []> aw_235_equation_0 = const()[name = tensor<string, []>("aw_235_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_235_cast_fp16 = einsum(equation = aw_235_equation_0, values = (var_1543_cast_fp16_17, var_1521_cast_fp16_17))[name = tensor<string, []>("aw_235_cast_fp16")];
+            tensor<string, []> aw_237_equation_0 = const()[name = tensor<string, []>("aw_237_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_237_cast_fp16 = einsum(equation = aw_237_equation_0, values = (var_1543_cast_fp16_18, var_1521_cast_fp16_18))[name = tensor<string, []>("aw_237_cast_fp16")];
+            tensor<string, []> aw_239_equation_0 = const()[name = tensor<string, []>("aw_239_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_239_cast_fp16 = einsum(equation = aw_239_equation_0, values = (var_1543_cast_fp16_19, var_1521_cast_fp16_19))[name = tensor<string, []>("aw_239_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1625_cast_fp16 = softmax(axis = var_1469, x = aw_201_cast_fp16)[name = tensor<string, []>("op_1625_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1626_cast_fp16 = softmax(axis = var_1469, x = aw_203_cast_fp16)[name = tensor<string, []>("op_1626_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1627_cast_fp16 = softmax(axis = var_1469, x = aw_205_cast_fp16)[name = tensor<string, []>("op_1627_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1628_cast_fp16 = softmax(axis = var_1469, x = aw_207_cast_fp16)[name = tensor<string, []>("op_1628_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1629_cast_fp16 = softmax(axis = var_1469, x = aw_209_cast_fp16)[name = tensor<string, []>("op_1629_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1630_cast_fp16 = softmax(axis = var_1469, x = aw_211_cast_fp16)[name = tensor<string, []>("op_1630_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1631_cast_fp16 = softmax(axis = var_1469, x = aw_213_cast_fp16)[name = tensor<string, []>("op_1631_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1632_cast_fp16 = softmax(axis = var_1469, x = aw_215_cast_fp16)[name = tensor<string, []>("op_1632_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1633_cast_fp16 = softmax(axis = var_1469, x = aw_217_cast_fp16)[name = tensor<string, []>("op_1633_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1634_cast_fp16 = softmax(axis = var_1469, x = aw_219_cast_fp16)[name = tensor<string, []>("op_1634_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1635_cast_fp16 = softmax(axis = var_1469, x = aw_221_cast_fp16)[name = tensor<string, []>("op_1635_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1636_cast_fp16 = softmax(axis = var_1469, x = aw_223_cast_fp16)[name = tensor<string, []>("op_1636_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1637_cast_fp16 = softmax(axis = var_1469, x = aw_225_cast_fp16)[name = tensor<string, []>("op_1637_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1638_cast_fp16 = softmax(axis = var_1469, x = aw_227_cast_fp16)[name = tensor<string, []>("op_1638_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1639_cast_fp16 = softmax(axis = var_1469, x = aw_229_cast_fp16)[name = tensor<string, []>("op_1639_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1640_cast_fp16 = softmax(axis = var_1469, x = aw_231_cast_fp16)[name = tensor<string, []>("op_1640_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1641_cast_fp16 = softmax(axis = var_1469, x = aw_233_cast_fp16)[name = tensor<string, []>("op_1641_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1642_cast_fp16 = softmax(axis = var_1469, x = aw_235_cast_fp16)[name = tensor<string, []>("op_1642_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1643_cast_fp16 = softmax(axis = var_1469, x = aw_237_cast_fp16)[name = tensor<string, []>("op_1643_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1644_cast_fp16 = softmax(axis = var_1469, x = aw_239_cast_fp16)[name = tensor<string, []>("op_1644_cast_fp16")];
+            tensor<string, []> var_1646_equation_0 = const()[name = tensor<string, []>("op_1646_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1646_cast_fp16 = einsum(equation = var_1646_equation_0, values = (var_1564_cast_fp16_0, var_1625_cast_fp16))[name = tensor<string, []>("op_1646_cast_fp16")];
+            tensor<string, []> var_1648_equation_0 = const()[name = tensor<string, []>("op_1648_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1648_cast_fp16 = einsum(equation = var_1648_equation_0, values = (var_1564_cast_fp16_1, var_1626_cast_fp16))[name = tensor<string, []>("op_1648_cast_fp16")];
+            tensor<string, []> var_1650_equation_0 = const()[name = tensor<string, []>("op_1650_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1650_cast_fp16 = einsum(equation = var_1650_equation_0, values = (var_1564_cast_fp16_2, var_1627_cast_fp16))[name = tensor<string, []>("op_1650_cast_fp16")];
+            tensor<string, []> var_1652_equation_0 = const()[name = tensor<string, []>("op_1652_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1652_cast_fp16 = einsum(equation = var_1652_equation_0, values = (var_1564_cast_fp16_3, var_1628_cast_fp16))[name = tensor<string, []>("op_1652_cast_fp16")];
+            tensor<string, []> var_1654_equation_0 = const()[name = tensor<string, []>("op_1654_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1654_cast_fp16 = einsum(equation = var_1654_equation_0, values = (var_1564_cast_fp16_4, var_1629_cast_fp16))[name = tensor<string, []>("op_1654_cast_fp16")];
+            tensor<string, []> var_1656_equation_0 = const()[name = tensor<string, []>("op_1656_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1656_cast_fp16 = einsum(equation = var_1656_equation_0, values = (var_1564_cast_fp16_5, var_1630_cast_fp16))[name = tensor<string, []>("op_1656_cast_fp16")];
+            tensor<string, []> var_1658_equation_0 = const()[name = tensor<string, []>("op_1658_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1658_cast_fp16 = einsum(equation = var_1658_equation_0, values = (var_1564_cast_fp16_6, var_1631_cast_fp16))[name = tensor<string, []>("op_1658_cast_fp16")];
+            tensor<string, []> var_1660_equation_0 = const()[name = tensor<string, []>("op_1660_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1660_cast_fp16 = einsum(equation = var_1660_equation_0, values = (var_1564_cast_fp16_7, var_1632_cast_fp16))[name = tensor<string, []>("op_1660_cast_fp16")];
+            tensor<string, []> var_1662_equation_0 = const()[name = tensor<string, []>("op_1662_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1662_cast_fp16 = einsum(equation = var_1662_equation_0, values = (var_1564_cast_fp16_8, var_1633_cast_fp16))[name = tensor<string, []>("op_1662_cast_fp16")];
+            tensor<string, []> var_1664_equation_0 = const()[name = tensor<string, []>("op_1664_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1664_cast_fp16 = einsum(equation = var_1664_equation_0, values = (var_1564_cast_fp16_9, var_1634_cast_fp16))[name = tensor<string, []>("op_1664_cast_fp16")];
+            tensor<string, []> var_1666_equation_0 = const()[name = tensor<string, []>("op_1666_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1666_cast_fp16 = einsum(equation = var_1666_equation_0, values = (var_1564_cast_fp16_10, var_1635_cast_fp16))[name = tensor<string, []>("op_1666_cast_fp16")];
+            tensor<string, []> var_1668_equation_0 = const()[name = tensor<string, []>("op_1668_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1668_cast_fp16 = einsum(equation = var_1668_equation_0, values = (var_1564_cast_fp16_11, var_1636_cast_fp16))[name = tensor<string, []>("op_1668_cast_fp16")];
+            tensor<string, []> var_1670_equation_0 = const()[name = tensor<string, []>("op_1670_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1670_cast_fp16 = einsum(equation = var_1670_equation_0, values = (var_1564_cast_fp16_12, var_1637_cast_fp16))[name = tensor<string, []>("op_1670_cast_fp16")];
+            tensor<string, []> var_1672_equation_0 = const()[name = tensor<string, []>("op_1672_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1672_cast_fp16 = einsum(equation = var_1672_equation_0, values = (var_1564_cast_fp16_13, var_1638_cast_fp16))[name = tensor<string, []>("op_1672_cast_fp16")];
+            tensor<string, []> var_1674_equation_0 = const()[name = tensor<string, []>("op_1674_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1674_cast_fp16 = einsum(equation = var_1674_equation_0, values = (var_1564_cast_fp16_14, var_1639_cast_fp16))[name = tensor<string, []>("op_1674_cast_fp16")];
+            tensor<string, []> var_1676_equation_0 = const()[name = tensor<string, []>("op_1676_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1676_cast_fp16 = einsum(equation = var_1676_equation_0, values = (var_1564_cast_fp16_15, var_1640_cast_fp16))[name = tensor<string, []>("op_1676_cast_fp16")];
+            tensor<string, []> var_1678_equation_0 = const()[name = tensor<string, []>("op_1678_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1678_cast_fp16 = einsum(equation = var_1678_equation_0, values = (var_1564_cast_fp16_16, var_1641_cast_fp16))[name = tensor<string, []>("op_1678_cast_fp16")];
+            tensor<string, []> var_1680_equation_0 = const()[name = tensor<string, []>("op_1680_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1680_cast_fp16 = einsum(equation = var_1680_equation_0, values = (var_1564_cast_fp16_17, var_1642_cast_fp16))[name = tensor<string, []>("op_1680_cast_fp16")];
+            tensor<string, []> var_1682_equation_0 = const()[name = tensor<string, []>("op_1682_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1682_cast_fp16 = einsum(equation = var_1682_equation_0, values = (var_1564_cast_fp16_18, var_1643_cast_fp16))[name = tensor<string, []>("op_1682_cast_fp16")];
+            tensor<string, []> var_1684_equation_0 = const()[name = tensor<string, []>("op_1684_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1684_cast_fp16 = einsum(equation = var_1684_equation_0, values = (var_1564_cast_fp16_19, var_1644_cast_fp16))[name = tensor<string, []>("op_1684_cast_fp16")];
+            tensor<bool, []> input_55_interleave_0 = const()[name = tensor<string, []>("input_55_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_55_cast_fp16 = concat(axis = var_1469, interleave = input_55_interleave_0, values = (var_1646_cast_fp16, var_1648_cast_fp16, var_1650_cast_fp16, var_1652_cast_fp16, var_1654_cast_fp16, var_1656_cast_fp16, var_1658_cast_fp16, var_1660_cast_fp16, var_1662_cast_fp16, var_1664_cast_fp16, var_1666_cast_fp16, var_1668_cast_fp16, var_1670_cast_fp16, var_1672_cast_fp16, var_1674_cast_fp16, var_1676_cast_fp16, var_1678_cast_fp16, var_1680_cast_fp16, var_1682_cast_fp16, var_1684_cast_fp16))[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<string, []> var_1693_pad_type_0 = const()[name = tensor<string, []>("op_1693_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1693_strides_0 = const()[name = tensor<string, []>("op_1693_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1693_pad_0 = const()[name = tensor<string, []>("op_1693_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1693_dilations_0 = const()[name = tensor<string, []>("op_1693_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1693_groups_0 = const()[name = tensor<string, []>("op_1693_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_5_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(220897792)))];
+            tensor<fp16, [1280]> blocks_5_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(224174656)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1693_cast_fp16 = conv(bias = blocks_5_attn_out_bias_to_fp16, dilations = var_1693_dilations_0, groups = var_1693_groups_0, pad = var_1693_pad_0, pad_type = var_1693_pad_type_0, strides = var_1693_strides_0, weight = blocks_5_attn_out_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("op_1693_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = var_1693_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> input_57_axes_0 = const()[name = tensor<string, []>("input_57_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_57_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_57_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(224177280)))];
+            tensor<fp16, [1280]> input_57_beta_0_to_fp16 = const()[name = tensor<string, []>("input_57_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(224179904)))];
+            tensor<fp16, []> var_1703_to_fp16 = const()[name = tensor<string, []>("op_1703_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_57_cast_fp16 = layer_norm(axes = input_57_axes_0, beta = input_57_beta_0_to_fp16, epsilon = var_1703_to_fp16, gamma = input_57_gamma_0_to_fp16, x = inputs_23_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<string, []> input_59_pad_type_0 = const()[name = tensor<string, []>("input_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_59_strides_0 = const()[name = tensor<string, []>("input_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_59_pad_0 = const()[name = tensor<string, []>("input_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_59_dilations_0 = const()[name = tensor<string, []>("input_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_59_groups_0 = const()[name = tensor<string, []>("input_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_5_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(224182528)))];
+            tensor<fp16, [5120]> blocks_5_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237289792)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_59_cast_fp16 = conv(bias = blocks_5_mlp_0_bias_to_fp16, dilations = input_59_dilations_0, groups = input_59_groups_0, pad = input_59_pad_0, pad_type = input_59_pad_type_0, strides = input_59_strides_0, weight = blocks_5_mlp_0_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<string, []> input_61_mode_0 = const()[name = tensor<string, []>("input_61_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_61_cast_fp16 = gelu(mode = input_61_mode_0, x = input_59_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
+            tensor<string, []> var_1729_pad_type_0 = const()[name = tensor<string, []>("op_1729_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1729_strides_0 = const()[name = tensor<string, []>("op_1729_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1729_pad_0 = const()[name = tensor<string, []>("op_1729_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1729_dilations_0 = const()[name = tensor<string, []>("op_1729_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1729_groups_0 = const()[name = tensor<string, []>("op_1729_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_5_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237300096)))];
+            tensor<fp16, [1280]> blocks_5_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250407360)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1729_cast_fp16 = conv(bias = blocks_5_mlp_2_bias_to_fp16, dilations = var_1729_dilations_0, groups = var_1729_groups_0, pad = var_1729_pad_0, pad_type = var_1729_pad_type_0, strides = var_1729_strides_0, weight = blocks_5_mlp_2_weight_to_fp16, x = input_61_cast_fp16)[name = tensor<string, []>("op_1729_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = var_1729_cast_fp16)[name = tensor<string, []>("inputs_25_cast_fp16")];
+            tensor<int32, []> var_1738 = const()[name = tensor<string, []>("op_1738"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_63_axes_0 = const()[name = tensor<string, []>("input_63_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_63_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_63_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250409984)))];
+            tensor<fp16, [1280]> input_63_beta_0_to_fp16 = const()[name = tensor<string, []>("input_63_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250412608)))];
+            tensor<fp16, []> var_1754_to_fp16 = const()[name = tensor<string, []>("op_1754_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_63_cast_fp16 = layer_norm(axes = input_63_axes_0, beta = input_63_beta_0_to_fp16, epsilon = var_1754_to_fp16, gamma = input_63_gamma_0_to_fp16, x = inputs_25_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
+            tensor<string, []> q_13_pad_type_0 = const()[name = tensor<string, []>("q_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_13_strides_0 = const()[name = tensor<string, []>("q_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_13_pad_0 = const()[name = tensor<string, []>("q_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_13_dilations_0 = const()[name = tensor<string, []>("q_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_13_groups_0 = const()[name = tensor<string, []>("q_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_1789_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1789_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250415232)))];
+            tensor<fp16, [1280]> var_1789_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1789_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(253692096)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1789_cast_fp16 = conv(bias = var_1789_bias_0_to_fp16, dilations = q_13_dilations_0, groups = q_13_groups_0, pad = q_13_pad_0, pad_type = q_13_pad_type_0, strides = q_13_strides_0, weight = var_1789_weight_0_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("op_1789_cast_fp16")];
+            tensor<string, []> k_13_pad_type_0 = const()[name = tensor<string, []>("k_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_13_strides_0 = const()[name = tensor<string, []>("k_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_13_pad_0 = const()[name = tensor<string, []>("k_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_13_dilations_0 = const()[name = tensor<string, []>("k_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_13_groups_0 = const()[name = tensor<string, []>("k_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_6_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(253694720)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_13_cast_fp16 = conv(dilations = k_13_dilations_0, groups = k_13_groups_0, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = k_13_strides_0, weight = blocks_6_attn_key_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("k_13_cast_fp16")];
+            tensor<string, []> var_1787_pad_type_0 = const()[name = tensor<string, []>("op_1787_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1787_strides_0 = const()[name = tensor<string, []>("op_1787_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1787_pad_0 = const()[name = tensor<string, []>("op_1787_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1787_dilations_0 = const()[name = tensor<string, []>("op_1787_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1787_groups_0 = const()[name = tensor<string, []>("op_1787_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_6_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(256971584)))];
+            tensor<fp16, [1280]> blocks_6_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(260248448)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1787_cast_fp16 = conv(bias = blocks_6_attn_value_bias_to_fp16, dilations = var_1787_dilations_0, groups = var_1787_groups_0, pad = var_1787_pad_0, pad_type = var_1787_pad_type_0, strides = var_1787_strides_0, weight = blocks_6_attn_value_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("op_1787_cast_fp16")];
+            tensor<int32, [20]> tile_18 = const()[name = tensor<string, []>("tile_18"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1790_axis_0 = const()[name = tensor<string, []>("op_1790_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_19 = split(axis = var_1790_axis_0, split_sizes = tile_18, x = var_1789_cast_fp16)[name = tensor<string, []>("op_1790_cast_fp16")];
+            tensor<int32, [4]> var_1811_perm_0 = const()[name = tensor<string, []>("op_1811_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_19 = const()[name = tensor<string, []>("tile_19"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1812_axis_0 = const()[name = tensor<string, []>("op_1812_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_1811_cast_fp16 = transpose(perm = var_1811_perm_0, x = k_13_cast_fp16)[name = tensor<string, []>("transpose_26")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_19 = split(axis = var_1812_axis_0, split_sizes = tile_19, x = var_1811_cast_fp16)[name = tensor<string, []>("op_1812_cast_fp16")];
+            tensor<int32, [20]> tile_20 = const()[name = tensor<string, []>("tile_20"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1833_axis_0 = const()[name = tensor<string, []>("op_1833_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_19 = split(axis = var_1833_axis_0, split_sizes = tile_20, x = var_1787_cast_fp16)[name = tensor<string, []>("op_1833_cast_fp16")];
+            tensor<string, []> aw_241_equation_0 = const()[name = tensor<string, []>("aw_241_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_241_cast_fp16 = einsum(equation = aw_241_equation_0, values = (var_1812_cast_fp16_0, var_1790_cast_fp16_0))[name = tensor<string, []>("aw_241_cast_fp16")];
+            tensor<string, []> aw_243_equation_0 = const()[name = tensor<string, []>("aw_243_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_243_cast_fp16 = einsum(equation = aw_243_equation_0, values = (var_1812_cast_fp16_1, var_1790_cast_fp16_1))[name = tensor<string, []>("aw_243_cast_fp16")];
+            tensor<string, []> aw_245_equation_0 = const()[name = tensor<string, []>("aw_245_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_245_cast_fp16 = einsum(equation = aw_245_equation_0, values = (var_1812_cast_fp16_2, var_1790_cast_fp16_2))[name = tensor<string, []>("aw_245_cast_fp16")];
+            tensor<string, []> aw_247_equation_0 = const()[name = tensor<string, []>("aw_247_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_247_cast_fp16 = einsum(equation = aw_247_equation_0, values = (var_1812_cast_fp16_3, var_1790_cast_fp16_3))[name = tensor<string, []>("aw_247_cast_fp16")];
+            tensor<string, []> aw_249_equation_0 = const()[name = tensor<string, []>("aw_249_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_249_cast_fp16 = einsum(equation = aw_249_equation_0, values = (var_1812_cast_fp16_4, var_1790_cast_fp16_4))[name = tensor<string, []>("aw_249_cast_fp16")];
+            tensor<string, []> aw_251_equation_0 = const()[name = tensor<string, []>("aw_251_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_251_cast_fp16 = einsum(equation = aw_251_equation_0, values = (var_1812_cast_fp16_5, var_1790_cast_fp16_5))[name = tensor<string, []>("aw_251_cast_fp16")];
+            tensor<string, []> aw_253_equation_0 = const()[name = tensor<string, []>("aw_253_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_253_cast_fp16 = einsum(equation = aw_253_equation_0, values = (var_1812_cast_fp16_6, var_1790_cast_fp16_6))[name = tensor<string, []>("aw_253_cast_fp16")];
+            tensor<string, []> aw_255_equation_0 = const()[name = tensor<string, []>("aw_255_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_255_cast_fp16 = einsum(equation = aw_255_equation_0, values = (var_1812_cast_fp16_7, var_1790_cast_fp16_7))[name = tensor<string, []>("aw_255_cast_fp16")];
+            tensor<string, []> aw_257_equation_0 = const()[name = tensor<string, []>("aw_257_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_257_cast_fp16 = einsum(equation = aw_257_equation_0, values = (var_1812_cast_fp16_8, var_1790_cast_fp16_8))[name = tensor<string, []>("aw_257_cast_fp16")];
+            tensor<string, []> aw_259_equation_0 = const()[name = tensor<string, []>("aw_259_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_259_cast_fp16 = einsum(equation = aw_259_equation_0, values = (var_1812_cast_fp16_9, var_1790_cast_fp16_9))[name = tensor<string, []>("aw_259_cast_fp16")];
+            tensor<string, []> aw_261_equation_0 = const()[name = tensor<string, []>("aw_261_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_261_cast_fp16 = einsum(equation = aw_261_equation_0, values = (var_1812_cast_fp16_10, var_1790_cast_fp16_10))[name = tensor<string, []>("aw_261_cast_fp16")];
+            tensor<string, []> aw_263_equation_0 = const()[name = tensor<string, []>("aw_263_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_263_cast_fp16 = einsum(equation = aw_263_equation_0, values = (var_1812_cast_fp16_11, var_1790_cast_fp16_11))[name = tensor<string, []>("aw_263_cast_fp16")];
+            tensor<string, []> aw_265_equation_0 = const()[name = tensor<string, []>("aw_265_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_265_cast_fp16 = einsum(equation = aw_265_equation_0, values = (var_1812_cast_fp16_12, var_1790_cast_fp16_12))[name = tensor<string, []>("aw_265_cast_fp16")];
+            tensor<string, []> aw_267_equation_0 = const()[name = tensor<string, []>("aw_267_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_267_cast_fp16 = einsum(equation = aw_267_equation_0, values = (var_1812_cast_fp16_13, var_1790_cast_fp16_13))[name = tensor<string, []>("aw_267_cast_fp16")];
+            tensor<string, []> aw_269_equation_0 = const()[name = tensor<string, []>("aw_269_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_269_cast_fp16 = einsum(equation = aw_269_equation_0, values = (var_1812_cast_fp16_14, var_1790_cast_fp16_14))[name = tensor<string, []>("aw_269_cast_fp16")];
+            tensor<string, []> aw_271_equation_0 = const()[name = tensor<string, []>("aw_271_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_271_cast_fp16 = einsum(equation = aw_271_equation_0, values = (var_1812_cast_fp16_15, var_1790_cast_fp16_15))[name = tensor<string, []>("aw_271_cast_fp16")];
+            tensor<string, []> aw_273_equation_0 = const()[name = tensor<string, []>("aw_273_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_273_cast_fp16 = einsum(equation = aw_273_equation_0, values = (var_1812_cast_fp16_16, var_1790_cast_fp16_16))[name = tensor<string, []>("aw_273_cast_fp16")];
+            tensor<string, []> aw_275_equation_0 = const()[name = tensor<string, []>("aw_275_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_275_cast_fp16 = einsum(equation = aw_275_equation_0, values = (var_1812_cast_fp16_17, var_1790_cast_fp16_17))[name = tensor<string, []>("aw_275_cast_fp16")];
+            tensor<string, []> aw_277_equation_0 = const()[name = tensor<string, []>("aw_277_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_277_cast_fp16 = einsum(equation = aw_277_equation_0, values = (var_1812_cast_fp16_18, var_1790_cast_fp16_18))[name = tensor<string, []>("aw_277_cast_fp16")];
+            tensor<string, []> aw_279_equation_0 = const()[name = tensor<string, []>("aw_279_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_279_cast_fp16 = einsum(equation = aw_279_equation_0, values = (var_1812_cast_fp16_19, var_1790_cast_fp16_19))[name = tensor<string, []>("aw_279_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1894_cast_fp16 = softmax(axis = var_1738, x = aw_241_cast_fp16)[name = tensor<string, []>("op_1894_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1895_cast_fp16 = softmax(axis = var_1738, x = aw_243_cast_fp16)[name = tensor<string, []>("op_1895_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1896_cast_fp16 = softmax(axis = var_1738, x = aw_245_cast_fp16)[name = tensor<string, []>("op_1896_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1897_cast_fp16 = softmax(axis = var_1738, x = aw_247_cast_fp16)[name = tensor<string, []>("op_1897_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1898_cast_fp16 = softmax(axis = var_1738, x = aw_249_cast_fp16)[name = tensor<string, []>("op_1898_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1899_cast_fp16 = softmax(axis = var_1738, x = aw_251_cast_fp16)[name = tensor<string, []>("op_1899_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1900_cast_fp16 = softmax(axis = var_1738, x = aw_253_cast_fp16)[name = tensor<string, []>("op_1900_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1901_cast_fp16 = softmax(axis = var_1738, x = aw_255_cast_fp16)[name = tensor<string, []>("op_1901_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1902_cast_fp16 = softmax(axis = var_1738, x = aw_257_cast_fp16)[name = tensor<string, []>("op_1902_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1903_cast_fp16 = softmax(axis = var_1738, x = aw_259_cast_fp16)[name = tensor<string, []>("op_1903_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1904_cast_fp16 = softmax(axis = var_1738, x = aw_261_cast_fp16)[name = tensor<string, []>("op_1904_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1905_cast_fp16 = softmax(axis = var_1738, x = aw_263_cast_fp16)[name = tensor<string, []>("op_1905_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1906_cast_fp16 = softmax(axis = var_1738, x = aw_265_cast_fp16)[name = tensor<string, []>("op_1906_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1907_cast_fp16 = softmax(axis = var_1738, x = aw_267_cast_fp16)[name = tensor<string, []>("op_1907_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1908_cast_fp16 = softmax(axis = var_1738, x = aw_269_cast_fp16)[name = tensor<string, []>("op_1908_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1909_cast_fp16 = softmax(axis = var_1738, x = aw_271_cast_fp16)[name = tensor<string, []>("op_1909_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1910_cast_fp16 = softmax(axis = var_1738, x = aw_273_cast_fp16)[name = tensor<string, []>("op_1910_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1911_cast_fp16 = softmax(axis = var_1738, x = aw_275_cast_fp16)[name = tensor<string, []>("op_1911_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1912_cast_fp16 = softmax(axis = var_1738, x = aw_277_cast_fp16)[name = tensor<string, []>("op_1912_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1913_cast_fp16 = softmax(axis = var_1738, x = aw_279_cast_fp16)[name = tensor<string, []>("op_1913_cast_fp16")];
+            tensor<string, []> var_1915_equation_0 = const()[name = tensor<string, []>("op_1915_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1915_cast_fp16 = einsum(equation = var_1915_equation_0, values = (var_1833_cast_fp16_0, var_1894_cast_fp16))[name = tensor<string, []>("op_1915_cast_fp16")];
+            tensor<string, []> var_1917_equation_0 = const()[name = tensor<string, []>("op_1917_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1917_cast_fp16 = einsum(equation = var_1917_equation_0, values = (var_1833_cast_fp16_1, var_1895_cast_fp16))[name = tensor<string, []>("op_1917_cast_fp16")];
+            tensor<string, []> var_1919_equation_0 = const()[name = tensor<string, []>("op_1919_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1919_cast_fp16 = einsum(equation = var_1919_equation_0, values = (var_1833_cast_fp16_2, var_1896_cast_fp16))[name = tensor<string, []>("op_1919_cast_fp16")];
+            tensor<string, []> var_1921_equation_0 = const()[name = tensor<string, []>("op_1921_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1921_cast_fp16 = einsum(equation = var_1921_equation_0, values = (var_1833_cast_fp16_3, var_1897_cast_fp16))[name = tensor<string, []>("op_1921_cast_fp16")];
+            tensor<string, []> var_1923_equation_0 = const()[name = tensor<string, []>("op_1923_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1923_cast_fp16 = einsum(equation = var_1923_equation_0, values = (var_1833_cast_fp16_4, var_1898_cast_fp16))[name = tensor<string, []>("op_1923_cast_fp16")];
+            tensor<string, []> var_1925_equation_0 = const()[name = tensor<string, []>("op_1925_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1925_cast_fp16 = einsum(equation = var_1925_equation_0, values = (var_1833_cast_fp16_5, var_1899_cast_fp16))[name = tensor<string, []>("op_1925_cast_fp16")];
+            tensor<string, []> var_1927_equation_0 = const()[name = tensor<string, []>("op_1927_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1927_cast_fp16 = einsum(equation = var_1927_equation_0, values = (var_1833_cast_fp16_6, var_1900_cast_fp16))[name = tensor<string, []>("op_1927_cast_fp16")];
+            tensor<string, []> var_1929_equation_0 = const()[name = tensor<string, []>("op_1929_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1929_cast_fp16 = einsum(equation = var_1929_equation_0, values = (var_1833_cast_fp16_7, var_1901_cast_fp16))[name = tensor<string, []>("op_1929_cast_fp16")];
+            tensor<string, []> var_1931_equation_0 = const()[name = tensor<string, []>("op_1931_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1931_cast_fp16 = einsum(equation = var_1931_equation_0, values = (var_1833_cast_fp16_8, var_1902_cast_fp16))[name = tensor<string, []>("op_1931_cast_fp16")];
+            tensor<string, []> var_1933_equation_0 = const()[name = tensor<string, []>("op_1933_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1933_cast_fp16 = einsum(equation = var_1933_equation_0, values = (var_1833_cast_fp16_9, var_1903_cast_fp16))[name = tensor<string, []>("op_1933_cast_fp16")];
+            tensor<string, []> var_1935_equation_0 = const()[name = tensor<string, []>("op_1935_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1935_cast_fp16 = einsum(equation = var_1935_equation_0, values = (var_1833_cast_fp16_10, var_1904_cast_fp16))[name = tensor<string, []>("op_1935_cast_fp16")];
+            tensor<string, []> var_1937_equation_0 = const()[name = tensor<string, []>("op_1937_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1937_cast_fp16 = einsum(equation = var_1937_equation_0, values = (var_1833_cast_fp16_11, var_1905_cast_fp16))[name = tensor<string, []>("op_1937_cast_fp16")];
+            tensor<string, []> var_1939_equation_0 = const()[name = tensor<string, []>("op_1939_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1939_cast_fp16 = einsum(equation = var_1939_equation_0, values = (var_1833_cast_fp16_12, var_1906_cast_fp16))[name = tensor<string, []>("op_1939_cast_fp16")];
+            tensor<string, []> var_1941_equation_0 = const()[name = tensor<string, []>("op_1941_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1941_cast_fp16 = einsum(equation = var_1941_equation_0, values = (var_1833_cast_fp16_13, var_1907_cast_fp16))[name = tensor<string, []>("op_1941_cast_fp16")];
+            tensor<string, []> var_1943_equation_0 = const()[name = tensor<string, []>("op_1943_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1943_cast_fp16 = einsum(equation = var_1943_equation_0, values = (var_1833_cast_fp16_14, var_1908_cast_fp16))[name = tensor<string, []>("op_1943_cast_fp16")];
+            tensor<string, []> var_1945_equation_0 = const()[name = tensor<string, []>("op_1945_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1945_cast_fp16 = einsum(equation = var_1945_equation_0, values = (var_1833_cast_fp16_15, var_1909_cast_fp16))[name = tensor<string, []>("op_1945_cast_fp16")];
+            tensor<string, []> var_1947_equation_0 = const()[name = tensor<string, []>("op_1947_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1947_cast_fp16 = einsum(equation = var_1947_equation_0, values = (var_1833_cast_fp16_16, var_1910_cast_fp16))[name = tensor<string, []>("op_1947_cast_fp16")];
+            tensor<string, []> var_1949_equation_0 = const()[name = tensor<string, []>("op_1949_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1949_cast_fp16 = einsum(equation = var_1949_equation_0, values = (var_1833_cast_fp16_17, var_1911_cast_fp16))[name = tensor<string, []>("op_1949_cast_fp16")];
+            tensor<string, []> var_1951_equation_0 = const()[name = tensor<string, []>("op_1951_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1951_cast_fp16 = einsum(equation = var_1951_equation_0, values = (var_1833_cast_fp16_18, var_1912_cast_fp16))[name = tensor<string, []>("op_1951_cast_fp16")];
+            tensor<string, []> var_1953_equation_0 = const()[name = tensor<string, []>("op_1953_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1953_cast_fp16 = einsum(equation = var_1953_equation_0, values = (var_1833_cast_fp16_19, var_1913_cast_fp16))[name = tensor<string, []>("op_1953_cast_fp16")];
+            tensor<bool, []> input_65_interleave_0 = const()[name = tensor<string, []>("input_65_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_65_cast_fp16 = concat(axis = var_1738, interleave = input_65_interleave_0, values = (var_1915_cast_fp16, var_1917_cast_fp16, var_1919_cast_fp16, var_1921_cast_fp16, var_1923_cast_fp16, var_1925_cast_fp16, var_1927_cast_fp16, var_1929_cast_fp16, var_1931_cast_fp16, var_1933_cast_fp16, var_1935_cast_fp16, var_1937_cast_fp16, var_1939_cast_fp16, var_1941_cast_fp16, var_1943_cast_fp16, var_1945_cast_fp16, var_1947_cast_fp16, var_1949_cast_fp16, var_1951_cast_fp16, var_1953_cast_fp16))[name = tensor<string, []>("input_65_cast_fp16")];
+            tensor<string, []> var_1962_pad_type_0 = const()[name = tensor<string, []>("op_1962_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1962_strides_0 = const()[name = tensor<string, []>("op_1962_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1962_pad_0 = const()[name = tensor<string, []>("op_1962_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1962_dilations_0 = const()[name = tensor<string, []>("op_1962_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1962_groups_0 = const()[name = tensor<string, []>("op_1962_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_6_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(260251072)))];
+            tensor<fp16, [1280]> blocks_6_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263527936)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1962_cast_fp16 = conv(bias = blocks_6_attn_out_bias_to_fp16, dilations = var_1962_dilations_0, groups = var_1962_groups_0, pad = var_1962_pad_0, pad_type = var_1962_pad_type_0, strides = var_1962_strides_0, weight = blocks_6_attn_out_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("op_1962_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = var_1962_cast_fp16)[name = tensor<string, []>("inputs_27_cast_fp16")];
+            tensor<int32, [1]> input_67_axes_0 = const()[name = tensor<string, []>("input_67_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_67_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_67_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263530560)))];
+            tensor<fp16, [1280]> input_67_beta_0_to_fp16 = const()[name = tensor<string, []>("input_67_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263533184)))];
+            tensor<fp16, []> var_1972_to_fp16 = const()[name = tensor<string, []>("op_1972_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_67_cast_fp16 = layer_norm(axes = input_67_axes_0, beta = input_67_beta_0_to_fp16, epsilon = var_1972_to_fp16, gamma = input_67_gamma_0_to_fp16, x = inputs_27_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
+            tensor<string, []> input_69_pad_type_0 = const()[name = tensor<string, []>("input_69_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_69_strides_0 = const()[name = tensor<string, []>("input_69_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_69_pad_0 = const()[name = tensor<string, []>("input_69_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_69_dilations_0 = const()[name = tensor<string, []>("input_69_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_69_groups_0 = const()[name = tensor<string, []>("input_69_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_6_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263535808)))];
+            tensor<fp16, [5120]> blocks_6_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(276643072)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_69_cast_fp16 = conv(bias = blocks_6_mlp_0_bias_to_fp16, dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = blocks_6_mlp_0_weight_to_fp16, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
+            tensor<string, []> input_71_mode_0 = const()[name = tensor<string, []>("input_71_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
+            tensor<string, []> var_1998_pad_type_0 = const()[name = tensor<string, []>("op_1998_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1998_strides_0 = const()[name = tensor<string, []>("op_1998_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1998_pad_0 = const()[name = tensor<string, []>("op_1998_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1998_dilations_0 = const()[name = tensor<string, []>("op_1998_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1998_groups_0 = const()[name = tensor<string, []>("op_1998_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_6_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(276653376)))];
+            tensor<fp16, [1280]> blocks_6_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289760640)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1998_cast_fp16 = conv(bias = blocks_6_mlp_2_bias_to_fp16, dilations = var_1998_dilations_0, groups = var_1998_groups_0, pad = var_1998_pad_0, pad_type = var_1998_pad_type_0, strides = var_1998_strides_0, weight = blocks_6_mlp_2_weight_to_fp16, x = input_71_cast_fp16)[name = tensor<string, []>("op_1998_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = var_1998_cast_fp16)[name = tensor<string, []>("inputs_29_cast_fp16")];
+            tensor<int32, []> var_2007 = const()[name = tensor<string, []>("op_2007"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_73_axes_0 = const()[name = tensor<string, []>("input_73_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_73_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_73_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289763264)))];
+            tensor<fp16, [1280]> input_73_beta_0_to_fp16 = const()[name = tensor<string, []>("input_73_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289765888)))];
+            tensor<fp16, []> var_2023_to_fp16 = const()[name = tensor<string, []>("op_2023_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_73_cast_fp16 = layer_norm(axes = input_73_axes_0, beta = input_73_beta_0_to_fp16, epsilon = var_2023_to_fp16, gamma = input_73_gamma_0_to_fp16, x = inputs_29_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
+            tensor<string, []> q_15_pad_type_0 = const()[name = tensor<string, []>("q_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_15_strides_0 = const()[name = tensor<string, []>("q_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_15_pad_0 = const()[name = tensor<string, []>("q_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_15_dilations_0 = const()[name = tensor<string, []>("q_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_15_groups_0 = const()[name = tensor<string, []>("q_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_2058_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2058_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289768512)))];
+            tensor<fp16, [1280]> var_2058_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2058_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293045376)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2058_cast_fp16 = conv(bias = var_2058_bias_0_to_fp16, dilations = q_15_dilations_0, groups = q_15_groups_0, pad = q_15_pad_0, pad_type = q_15_pad_type_0, strides = q_15_strides_0, weight = var_2058_weight_0_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("op_2058_cast_fp16")];
+            tensor<string, []> k_15_pad_type_0 = const()[name = tensor<string, []>("k_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_15_strides_0 = const()[name = tensor<string, []>("k_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_15_pad_0 = const()[name = tensor<string, []>("k_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_15_dilations_0 = const()[name = tensor<string, []>("k_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_15_groups_0 = const()[name = tensor<string, []>("k_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_7_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293048000)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_15_cast_fp16 = conv(dilations = k_15_dilations_0, groups = k_15_groups_0, pad = k_15_pad_0, pad_type = k_15_pad_type_0, strides = k_15_strides_0, weight = blocks_7_attn_key_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("k_15_cast_fp16")];
+            tensor<string, []> var_2056_pad_type_0 = const()[name = tensor<string, []>("op_2056_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2056_strides_0 = const()[name = tensor<string, []>("op_2056_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2056_pad_0 = const()[name = tensor<string, []>("op_2056_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2056_dilations_0 = const()[name = tensor<string, []>("op_2056_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2056_groups_0 = const()[name = tensor<string, []>("op_2056_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_7_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(296324864)))];
+            tensor<fp16, [1280]> blocks_7_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(299601728)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2056_cast_fp16 = conv(bias = blocks_7_attn_value_bias_to_fp16, dilations = var_2056_dilations_0, groups = var_2056_groups_0, pad = var_2056_pad_0, pad_type = var_2056_pad_type_0, strides = var_2056_strides_0, weight = blocks_7_attn_value_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("op_2056_cast_fp16")];
+            tensor<int32, [20]> tile_21 = const()[name = tensor<string, []>("tile_21"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2059_axis_0 = const()[name = tensor<string, []>("op_2059_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_19 = split(axis = var_2059_axis_0, split_sizes = tile_21, x = var_2058_cast_fp16)[name = tensor<string, []>("op_2059_cast_fp16")];
+            tensor<int32, [4]> var_2080_perm_0 = const()[name = tensor<string, []>("op_2080_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_22 = const()[name = tensor<string, []>("tile_22"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2081_axis_0 = const()[name = tensor<string, []>("op_2081_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_2080_cast_fp16 = transpose(perm = var_2080_perm_0, x = k_15_cast_fp16)[name = tensor<string, []>("transpose_25")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_19 = split(axis = var_2081_axis_0, split_sizes = tile_22, x = var_2080_cast_fp16)[name = tensor<string, []>("op_2081_cast_fp16")];
+            tensor<int32, [20]> tile_23 = const()[name = tensor<string, []>("tile_23"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2102_axis_0 = const()[name = tensor<string, []>("op_2102_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_19 = split(axis = var_2102_axis_0, split_sizes = tile_23, x = var_2056_cast_fp16)[name = tensor<string, []>("op_2102_cast_fp16")];
+            tensor<string, []> aw_281_equation_0 = const()[name = tensor<string, []>("aw_281_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_281_cast_fp16 = einsum(equation = aw_281_equation_0, values = (var_2081_cast_fp16_0, var_2059_cast_fp16_0))[name = tensor<string, []>("aw_281_cast_fp16")];
+            tensor<string, []> aw_283_equation_0 = const()[name = tensor<string, []>("aw_283_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_283_cast_fp16 = einsum(equation = aw_283_equation_0, values = (var_2081_cast_fp16_1, var_2059_cast_fp16_1))[name = tensor<string, []>("aw_283_cast_fp16")];
+            tensor<string, []> aw_285_equation_0 = const()[name = tensor<string, []>("aw_285_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_285_cast_fp16 = einsum(equation = aw_285_equation_0, values = (var_2081_cast_fp16_2, var_2059_cast_fp16_2))[name = tensor<string, []>("aw_285_cast_fp16")];
+            tensor<string, []> aw_287_equation_0 = const()[name = tensor<string, []>("aw_287_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_287_cast_fp16 = einsum(equation = aw_287_equation_0, values = (var_2081_cast_fp16_3, var_2059_cast_fp16_3))[name = tensor<string, []>("aw_287_cast_fp16")];
+            tensor<string, []> aw_289_equation_0 = const()[name = tensor<string, []>("aw_289_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_289_cast_fp16 = einsum(equation = aw_289_equation_0, values = (var_2081_cast_fp16_4, var_2059_cast_fp16_4))[name = tensor<string, []>("aw_289_cast_fp16")];
+            tensor<string, []> aw_291_equation_0 = const()[name = tensor<string, []>("aw_291_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_291_cast_fp16 = einsum(equation = aw_291_equation_0, values = (var_2081_cast_fp16_5, var_2059_cast_fp16_5))[name = tensor<string, []>("aw_291_cast_fp16")];
+            tensor<string, []> aw_293_equation_0 = const()[name = tensor<string, []>("aw_293_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_293_cast_fp16 = einsum(equation = aw_293_equation_0, values = (var_2081_cast_fp16_6, var_2059_cast_fp16_6))[name = tensor<string, []>("aw_293_cast_fp16")];
+            tensor<string, []> aw_295_equation_0 = const()[name = tensor<string, []>("aw_295_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_295_cast_fp16 = einsum(equation = aw_295_equation_0, values = (var_2081_cast_fp16_7, var_2059_cast_fp16_7))[name = tensor<string, []>("aw_295_cast_fp16")];
+            tensor<string, []> aw_297_equation_0 = const()[name = tensor<string, []>("aw_297_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_297_cast_fp16 = einsum(equation = aw_297_equation_0, values = (var_2081_cast_fp16_8, var_2059_cast_fp16_8))[name = tensor<string, []>("aw_297_cast_fp16")];
+            tensor<string, []> aw_299_equation_0 = const()[name = tensor<string, []>("aw_299_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_299_cast_fp16 = einsum(equation = aw_299_equation_0, values = (var_2081_cast_fp16_9, var_2059_cast_fp16_9))[name = tensor<string, []>("aw_299_cast_fp16")];
+            tensor<string, []> aw_301_equation_0 = const()[name = tensor<string, []>("aw_301_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_301_cast_fp16 = einsum(equation = aw_301_equation_0, values = (var_2081_cast_fp16_10, var_2059_cast_fp16_10))[name = tensor<string, []>("aw_301_cast_fp16")];
+            tensor<string, []> aw_303_equation_0 = const()[name = tensor<string, []>("aw_303_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_303_cast_fp16 = einsum(equation = aw_303_equation_0, values = (var_2081_cast_fp16_11, var_2059_cast_fp16_11))[name = tensor<string, []>("aw_303_cast_fp16")];
+            tensor<string, []> aw_305_equation_0 = const()[name = tensor<string, []>("aw_305_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_305_cast_fp16 = einsum(equation = aw_305_equation_0, values = (var_2081_cast_fp16_12, var_2059_cast_fp16_12))[name = tensor<string, []>("aw_305_cast_fp16")];
+            tensor<string, []> aw_307_equation_0 = const()[name = tensor<string, []>("aw_307_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_307_cast_fp16 = einsum(equation = aw_307_equation_0, values = (var_2081_cast_fp16_13, var_2059_cast_fp16_13))[name = tensor<string, []>("aw_307_cast_fp16")];
+            tensor<string, []> aw_309_equation_0 = const()[name = tensor<string, []>("aw_309_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_309_cast_fp16 = einsum(equation = aw_309_equation_0, values = (var_2081_cast_fp16_14, var_2059_cast_fp16_14))[name = tensor<string, []>("aw_309_cast_fp16")];
+            tensor<string, []> aw_311_equation_0 = const()[name = tensor<string, []>("aw_311_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_311_cast_fp16 = einsum(equation = aw_311_equation_0, values = (var_2081_cast_fp16_15, var_2059_cast_fp16_15))[name = tensor<string, []>("aw_311_cast_fp16")];
+            tensor<string, []> aw_313_equation_0 = const()[name = tensor<string, []>("aw_313_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_313_cast_fp16 = einsum(equation = aw_313_equation_0, values = (var_2081_cast_fp16_16, var_2059_cast_fp16_16))[name = tensor<string, []>("aw_313_cast_fp16")];
+            tensor<string, []> aw_315_equation_0 = const()[name = tensor<string, []>("aw_315_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_315_cast_fp16 = einsum(equation = aw_315_equation_0, values = (var_2081_cast_fp16_17, var_2059_cast_fp16_17))[name = tensor<string, []>("aw_315_cast_fp16")];
+            tensor<string, []> aw_317_equation_0 = const()[name = tensor<string, []>("aw_317_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_317_cast_fp16 = einsum(equation = aw_317_equation_0, values = (var_2081_cast_fp16_18, var_2059_cast_fp16_18))[name = tensor<string, []>("aw_317_cast_fp16")];
+            tensor<string, []> aw_319_equation_0 = const()[name = tensor<string, []>("aw_319_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_319_cast_fp16 = einsum(equation = aw_319_equation_0, values = (var_2081_cast_fp16_19, var_2059_cast_fp16_19))[name = tensor<string, []>("aw_319_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2163_cast_fp16 = softmax(axis = var_2007, x = aw_281_cast_fp16)[name = tensor<string, []>("op_2163_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2164_cast_fp16 = softmax(axis = var_2007, x = aw_283_cast_fp16)[name = tensor<string, []>("op_2164_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2165_cast_fp16 = softmax(axis = var_2007, x = aw_285_cast_fp16)[name = tensor<string, []>("op_2165_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2166_cast_fp16 = softmax(axis = var_2007, x = aw_287_cast_fp16)[name = tensor<string, []>("op_2166_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2167_cast_fp16 = softmax(axis = var_2007, x = aw_289_cast_fp16)[name = tensor<string, []>("op_2167_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2168_cast_fp16 = softmax(axis = var_2007, x = aw_291_cast_fp16)[name = tensor<string, []>("op_2168_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2169_cast_fp16 = softmax(axis = var_2007, x = aw_293_cast_fp16)[name = tensor<string, []>("op_2169_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2170_cast_fp16 = softmax(axis = var_2007, x = aw_295_cast_fp16)[name = tensor<string, []>("op_2170_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2171_cast_fp16 = softmax(axis = var_2007, x = aw_297_cast_fp16)[name = tensor<string, []>("op_2171_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2172_cast_fp16 = softmax(axis = var_2007, x = aw_299_cast_fp16)[name = tensor<string, []>("op_2172_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2173_cast_fp16 = softmax(axis = var_2007, x = aw_301_cast_fp16)[name = tensor<string, []>("op_2173_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2174_cast_fp16 = softmax(axis = var_2007, x = aw_303_cast_fp16)[name = tensor<string, []>("op_2174_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2175_cast_fp16 = softmax(axis = var_2007, x = aw_305_cast_fp16)[name = tensor<string, []>("op_2175_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2176_cast_fp16 = softmax(axis = var_2007, x = aw_307_cast_fp16)[name = tensor<string, []>("op_2176_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2177_cast_fp16 = softmax(axis = var_2007, x = aw_309_cast_fp16)[name = tensor<string, []>("op_2177_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2178_cast_fp16 = softmax(axis = var_2007, x = aw_311_cast_fp16)[name = tensor<string, []>("op_2178_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2179_cast_fp16 = softmax(axis = var_2007, x = aw_313_cast_fp16)[name = tensor<string, []>("op_2179_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2180_cast_fp16 = softmax(axis = var_2007, x = aw_315_cast_fp16)[name = tensor<string, []>("op_2180_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2181_cast_fp16 = softmax(axis = var_2007, x = aw_317_cast_fp16)[name = tensor<string, []>("op_2181_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2182_cast_fp16 = softmax(axis = var_2007, x = aw_319_cast_fp16)[name = tensor<string, []>("op_2182_cast_fp16")];
+            tensor<string, []> var_2184_equation_0 = const()[name = tensor<string, []>("op_2184_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2184_cast_fp16 = einsum(equation = var_2184_equation_0, values = (var_2102_cast_fp16_0, var_2163_cast_fp16))[name = tensor<string, []>("op_2184_cast_fp16")];
+            tensor<string, []> var_2186_equation_0 = const()[name = tensor<string, []>("op_2186_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16 = einsum(equation = var_2186_equation_0, values = (var_2102_cast_fp16_1, var_2164_cast_fp16))[name = tensor<string, []>("op_2186_cast_fp16")];
+            tensor<string, []> var_2188_equation_0 = const()[name = tensor<string, []>("op_2188_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2188_cast_fp16 = einsum(equation = var_2188_equation_0, values = (var_2102_cast_fp16_2, var_2165_cast_fp16))[name = tensor<string, []>("op_2188_cast_fp16")];
+            tensor<string, []> var_2190_equation_0 = const()[name = tensor<string, []>("op_2190_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2190_cast_fp16 = einsum(equation = var_2190_equation_0, values = (var_2102_cast_fp16_3, var_2166_cast_fp16))[name = tensor<string, []>("op_2190_cast_fp16")];
+            tensor<string, []> var_2192_equation_0 = const()[name = tensor<string, []>("op_2192_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2192_cast_fp16 = einsum(equation = var_2192_equation_0, values = (var_2102_cast_fp16_4, var_2167_cast_fp16))[name = tensor<string, []>("op_2192_cast_fp16")];
+            tensor<string, []> var_2194_equation_0 = const()[name = tensor<string, []>("op_2194_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2194_cast_fp16 = einsum(equation = var_2194_equation_0, values = (var_2102_cast_fp16_5, var_2168_cast_fp16))[name = tensor<string, []>("op_2194_cast_fp16")];
+            tensor<string, []> var_2196_equation_0 = const()[name = tensor<string, []>("op_2196_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2196_cast_fp16 = einsum(equation = var_2196_equation_0, values = (var_2102_cast_fp16_6, var_2169_cast_fp16))[name = tensor<string, []>("op_2196_cast_fp16")];
+            tensor<string, []> var_2198_equation_0 = const()[name = tensor<string, []>("op_2198_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2198_cast_fp16 = einsum(equation = var_2198_equation_0, values = (var_2102_cast_fp16_7, var_2170_cast_fp16))[name = tensor<string, []>("op_2198_cast_fp16")];
+            tensor<string, []> var_2200_equation_0 = const()[name = tensor<string, []>("op_2200_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2200_cast_fp16 = einsum(equation = var_2200_equation_0, values = (var_2102_cast_fp16_8, var_2171_cast_fp16))[name = tensor<string, []>("op_2200_cast_fp16")];
+            tensor<string, []> var_2202_equation_0 = const()[name = tensor<string, []>("op_2202_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2202_cast_fp16 = einsum(equation = var_2202_equation_0, values = (var_2102_cast_fp16_9, var_2172_cast_fp16))[name = tensor<string, []>("op_2202_cast_fp16")];
+            tensor<string, []> var_2204_equation_0 = const()[name = tensor<string, []>("op_2204_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2204_cast_fp16 = einsum(equation = var_2204_equation_0, values = (var_2102_cast_fp16_10, var_2173_cast_fp16))[name = tensor<string, []>("op_2204_cast_fp16")];
+            tensor<string, []> var_2206_equation_0 = const()[name = tensor<string, []>("op_2206_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2206_cast_fp16 = einsum(equation = var_2206_equation_0, values = (var_2102_cast_fp16_11, var_2174_cast_fp16))[name = tensor<string, []>("op_2206_cast_fp16")];
+            tensor<string, []> var_2208_equation_0 = const()[name = tensor<string, []>("op_2208_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2208_cast_fp16 = einsum(equation = var_2208_equation_0, values = (var_2102_cast_fp16_12, var_2175_cast_fp16))[name = tensor<string, []>("op_2208_cast_fp16")];
+            tensor<string, []> var_2210_equation_0 = const()[name = tensor<string, []>("op_2210_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2210_cast_fp16 = einsum(equation = var_2210_equation_0, values = (var_2102_cast_fp16_13, var_2176_cast_fp16))[name = tensor<string, []>("op_2210_cast_fp16")];
+            tensor<string, []> var_2212_equation_0 = const()[name = tensor<string, []>("op_2212_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2212_cast_fp16 = einsum(equation = var_2212_equation_0, values = (var_2102_cast_fp16_14, var_2177_cast_fp16))[name = tensor<string, []>("op_2212_cast_fp16")];
+            tensor<string, []> var_2214_equation_0 = const()[name = tensor<string, []>("op_2214_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2214_cast_fp16 = einsum(equation = var_2214_equation_0, values = (var_2102_cast_fp16_15, var_2178_cast_fp16))[name = tensor<string, []>("op_2214_cast_fp16")];
+            tensor<string, []> var_2216_equation_0 = const()[name = tensor<string, []>("op_2216_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2216_cast_fp16 = einsum(equation = var_2216_equation_0, values = (var_2102_cast_fp16_16, var_2179_cast_fp16))[name = tensor<string, []>("op_2216_cast_fp16")];
+            tensor<string, []> var_2218_equation_0 = const()[name = tensor<string, []>("op_2218_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2218_cast_fp16 = einsum(equation = var_2218_equation_0, values = (var_2102_cast_fp16_17, var_2180_cast_fp16))[name = tensor<string, []>("op_2218_cast_fp16")];
+            tensor<string, []> var_2220_equation_0 = const()[name = tensor<string, []>("op_2220_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2220_cast_fp16 = einsum(equation = var_2220_equation_0, values = (var_2102_cast_fp16_18, var_2181_cast_fp16))[name = tensor<string, []>("op_2220_cast_fp16")];
+            tensor<string, []> var_2222_equation_0 = const()[name = tensor<string, []>("op_2222_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2222_cast_fp16 = einsum(equation = var_2222_equation_0, values = (var_2102_cast_fp16_19, var_2182_cast_fp16))[name = tensor<string, []>("op_2222_cast_fp16")];
+            tensor<bool, []> input_75_interleave_0 = const()[name = tensor<string, []>("input_75_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_75_cast_fp16 = concat(axis = var_2007, interleave = input_75_interleave_0, values = (var_2184_cast_fp16, var_2186_cast_fp16, var_2188_cast_fp16, var_2190_cast_fp16, var_2192_cast_fp16, var_2194_cast_fp16, var_2196_cast_fp16, var_2198_cast_fp16, var_2200_cast_fp16, var_2202_cast_fp16, var_2204_cast_fp16, var_2206_cast_fp16, var_2208_cast_fp16, var_2210_cast_fp16, var_2212_cast_fp16, var_2214_cast_fp16, var_2216_cast_fp16, var_2218_cast_fp16, var_2220_cast_fp16, var_2222_cast_fp16))[name = tensor<string, []>("input_75_cast_fp16")];
+            tensor<string, []> var_2231_pad_type_0 = const()[name = tensor<string, []>("op_2231_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2231_strides_0 = const()[name = tensor<string, []>("op_2231_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2231_pad_0 = const()[name = tensor<string, []>("op_2231_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2231_dilations_0 = const()[name = tensor<string, []>("op_2231_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2231_groups_0 = const()[name = tensor<string, []>("op_2231_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_7_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(299604352)))];
+            tensor<fp16, [1280]> blocks_7_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(302881216)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2231_cast_fp16 = conv(bias = blocks_7_attn_out_bias_to_fp16, dilations = var_2231_dilations_0, groups = var_2231_groups_0, pad = var_2231_pad_0, pad_type = var_2231_pad_type_0, strides = var_2231_strides_0, weight = blocks_7_attn_out_weight_to_fp16, x = input_75_cast_fp16)[name = tensor<string, []>("op_2231_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = var_2231_cast_fp16)[name = tensor<string, []>("inputs_31_cast_fp16")];
+            tensor<int32, [1]> input_77_axes_0 = const()[name = tensor<string, []>("input_77_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_77_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_77_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(302883840)))];
+            tensor<fp16, [1280]> input_77_beta_0_to_fp16 = const()[name = tensor<string, []>("input_77_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(302886464)))];
+            tensor<fp16, []> var_2241_to_fp16 = const()[name = tensor<string, []>("op_2241_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_77_cast_fp16 = layer_norm(axes = input_77_axes_0, beta = input_77_beta_0_to_fp16, epsilon = var_2241_to_fp16, gamma = input_77_gamma_0_to_fp16, x = inputs_31_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
+            tensor<string, []> input_79_pad_type_0 = const()[name = tensor<string, []>("input_79_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_79_strides_0 = const()[name = tensor<string, []>("input_79_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_79_pad_0 = const()[name = tensor<string, []>("input_79_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_79_dilations_0 = const()[name = tensor<string, []>("input_79_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_79_groups_0 = const()[name = tensor<string, []>("input_79_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_7_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(302889088)))];
+            tensor<fp16, [5120]> blocks_7_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(315996352)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_79_cast_fp16 = conv(bias = blocks_7_mlp_0_bias_to_fp16, dilations = input_79_dilations_0, groups = input_79_groups_0, pad = input_79_pad_0, pad_type = input_79_pad_type_0, strides = input_79_strides_0, weight = blocks_7_mlp_0_weight_to_fp16, x = input_77_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
+            tensor<string, []> input_81_mode_0 = const()[name = tensor<string, []>("input_81_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_81_cast_fp16 = gelu(mode = input_81_mode_0, x = input_79_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
+            tensor<string, []> var_2267_pad_type_0 = const()[name = tensor<string, []>("op_2267_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2267_strides_0 = const()[name = tensor<string, []>("op_2267_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2267_pad_0 = const()[name = tensor<string, []>("op_2267_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2267_dilations_0 = const()[name = tensor<string, []>("op_2267_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2267_groups_0 = const()[name = tensor<string, []>("op_2267_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_7_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(316006656)))];
+            tensor<fp16, [1280]> blocks_7_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(329113920)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2267_cast_fp16 = conv(bias = blocks_7_mlp_2_bias_to_fp16, dilations = var_2267_dilations_0, groups = var_2267_groups_0, pad = var_2267_pad_0, pad_type = var_2267_pad_type_0, strides = var_2267_strides_0, weight = blocks_7_mlp_2_weight_to_fp16, x = input_81_cast_fp16)[name = tensor<string, []>("op_2267_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = var_2267_cast_fp16)[name = tensor<string, []>("inputs_33_cast_fp16")];
+            tensor<int32, []> var_2276 = const()[name = tensor<string, []>("op_2276"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_83_axes_0 = const()[name = tensor<string, []>("input_83_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_83_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_83_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(329116544)))];
+            tensor<fp16, [1280]> input_83_beta_0_to_fp16 = const()[name = tensor<string, []>("input_83_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(329119168)))];
+            tensor<fp16, []> var_2292_to_fp16 = const()[name = tensor<string, []>("op_2292_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_83_cast_fp16 = layer_norm(axes = input_83_axes_0, beta = input_83_beta_0_to_fp16, epsilon = var_2292_to_fp16, gamma = input_83_gamma_0_to_fp16, x = inputs_33_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
+            tensor<string, []> q_17_pad_type_0 = const()[name = tensor<string, []>("q_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_17_strides_0 = const()[name = tensor<string, []>("q_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_17_pad_0 = const()[name = tensor<string, []>("q_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_17_dilations_0 = const()[name = tensor<string, []>("q_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_17_groups_0 = const()[name = tensor<string, []>("q_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_2327_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2327_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(329121792)))];
+            tensor<fp16, [1280]> var_2327_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2327_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(332398656)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2327_cast_fp16 = conv(bias = var_2327_bias_0_to_fp16, dilations = q_17_dilations_0, groups = q_17_groups_0, pad = q_17_pad_0, pad_type = q_17_pad_type_0, strides = q_17_strides_0, weight = var_2327_weight_0_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("op_2327_cast_fp16")];
+            tensor<string, []> k_17_pad_type_0 = const()[name = tensor<string, []>("k_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_17_strides_0 = const()[name = tensor<string, []>("k_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_17_pad_0 = const()[name = tensor<string, []>("k_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_17_dilations_0 = const()[name = tensor<string, []>("k_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_17_groups_0 = const()[name = tensor<string, []>("k_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_8_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(332401280)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_17_cast_fp16 = conv(dilations = k_17_dilations_0, groups = k_17_groups_0, pad = k_17_pad_0, pad_type = k_17_pad_type_0, strides = k_17_strides_0, weight = blocks_8_attn_key_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("k_17_cast_fp16")];
+            tensor<string, []> var_2325_pad_type_0 = const()[name = tensor<string, []>("op_2325_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2325_strides_0 = const()[name = tensor<string, []>("op_2325_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2325_pad_0 = const()[name = tensor<string, []>("op_2325_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2325_dilations_0 = const()[name = tensor<string, []>("op_2325_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2325_groups_0 = const()[name = tensor<string, []>("op_2325_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_8_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(335678144)))];
+            tensor<fp16, [1280]> blocks_8_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(338955008)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2325_cast_fp16 = conv(bias = blocks_8_attn_value_bias_to_fp16, dilations = var_2325_dilations_0, groups = var_2325_groups_0, pad = var_2325_pad_0, pad_type = var_2325_pad_type_0, strides = var_2325_strides_0, weight = blocks_8_attn_value_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("op_2325_cast_fp16")];
+            tensor<int32, [20]> tile_24 = const()[name = tensor<string, []>("tile_24"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2328_axis_0 = const()[name = tensor<string, []>("op_2328_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_19 = split(axis = var_2328_axis_0, split_sizes = tile_24, x = var_2327_cast_fp16)[name = tensor<string, []>("op_2328_cast_fp16")];
+            tensor<int32, [4]> var_2349_perm_0 = const()[name = tensor<string, []>("op_2349_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_25 = const()[name = tensor<string, []>("tile_25"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2350_axis_0 = const()[name = tensor<string, []>("op_2350_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_2349_cast_fp16 = transpose(perm = var_2349_perm_0, x = k_17_cast_fp16)[name = tensor<string, []>("transpose_24")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_19 = split(axis = var_2350_axis_0, split_sizes = tile_25, x = var_2349_cast_fp16)[name = tensor<string, []>("op_2350_cast_fp16")];
+            tensor<int32, [20]> tile_26 = const()[name = tensor<string, []>("tile_26"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2371_axis_0 = const()[name = tensor<string, []>("op_2371_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_19 = split(axis = var_2371_axis_0, split_sizes = tile_26, x = var_2325_cast_fp16)[name = tensor<string, []>("op_2371_cast_fp16")];
+            tensor<string, []> aw_321_equation_0 = const()[name = tensor<string, []>("aw_321_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_321_cast_fp16 = einsum(equation = aw_321_equation_0, values = (var_2350_cast_fp16_0, var_2328_cast_fp16_0))[name = tensor<string, []>("aw_321_cast_fp16")];
+            tensor<string, []> aw_323_equation_0 = const()[name = tensor<string, []>("aw_323_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_323_cast_fp16 = einsum(equation = aw_323_equation_0, values = (var_2350_cast_fp16_1, var_2328_cast_fp16_1))[name = tensor<string, []>("aw_323_cast_fp16")];
+            tensor<string, []> aw_325_equation_0 = const()[name = tensor<string, []>("aw_325_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_325_cast_fp16 = einsum(equation = aw_325_equation_0, values = (var_2350_cast_fp16_2, var_2328_cast_fp16_2))[name = tensor<string, []>("aw_325_cast_fp16")];
+            tensor<string, []> aw_327_equation_0 = const()[name = tensor<string, []>("aw_327_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_327_cast_fp16 = einsum(equation = aw_327_equation_0, values = (var_2350_cast_fp16_3, var_2328_cast_fp16_3))[name = tensor<string, []>("aw_327_cast_fp16")];
+            tensor<string, []> aw_329_equation_0 = const()[name = tensor<string, []>("aw_329_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_329_cast_fp16 = einsum(equation = aw_329_equation_0, values = (var_2350_cast_fp16_4, var_2328_cast_fp16_4))[name = tensor<string, []>("aw_329_cast_fp16")];
+            tensor<string, []> aw_331_equation_0 = const()[name = tensor<string, []>("aw_331_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_331_cast_fp16 = einsum(equation = aw_331_equation_0, values = (var_2350_cast_fp16_5, var_2328_cast_fp16_5))[name = tensor<string, []>("aw_331_cast_fp16")];
+            tensor<string, []> aw_333_equation_0 = const()[name = tensor<string, []>("aw_333_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_333_cast_fp16 = einsum(equation = aw_333_equation_0, values = (var_2350_cast_fp16_6, var_2328_cast_fp16_6))[name = tensor<string, []>("aw_333_cast_fp16")];
+            tensor<string, []> aw_335_equation_0 = const()[name = tensor<string, []>("aw_335_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_335_cast_fp16 = einsum(equation = aw_335_equation_0, values = (var_2350_cast_fp16_7, var_2328_cast_fp16_7))[name = tensor<string, []>("aw_335_cast_fp16")];
+            tensor<string, []> aw_337_equation_0 = const()[name = tensor<string, []>("aw_337_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_337_cast_fp16 = einsum(equation = aw_337_equation_0, values = (var_2350_cast_fp16_8, var_2328_cast_fp16_8))[name = tensor<string, []>("aw_337_cast_fp16")];
+            tensor<string, []> aw_339_equation_0 = const()[name = tensor<string, []>("aw_339_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_339_cast_fp16 = einsum(equation = aw_339_equation_0, values = (var_2350_cast_fp16_9, var_2328_cast_fp16_9))[name = tensor<string, []>("aw_339_cast_fp16")];
+            tensor<string, []> aw_341_equation_0 = const()[name = tensor<string, []>("aw_341_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_341_cast_fp16 = einsum(equation = aw_341_equation_0, values = (var_2350_cast_fp16_10, var_2328_cast_fp16_10))[name = tensor<string, []>("aw_341_cast_fp16")];
+            tensor<string, []> aw_343_equation_0 = const()[name = tensor<string, []>("aw_343_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_343_cast_fp16 = einsum(equation = aw_343_equation_0, values = (var_2350_cast_fp16_11, var_2328_cast_fp16_11))[name = tensor<string, []>("aw_343_cast_fp16")];
+            tensor<string, []> aw_345_equation_0 = const()[name = tensor<string, []>("aw_345_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_345_cast_fp16 = einsum(equation = aw_345_equation_0, values = (var_2350_cast_fp16_12, var_2328_cast_fp16_12))[name = tensor<string, []>("aw_345_cast_fp16")];
+            tensor<string, []> aw_347_equation_0 = const()[name = tensor<string, []>("aw_347_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_347_cast_fp16 = einsum(equation = aw_347_equation_0, values = (var_2350_cast_fp16_13, var_2328_cast_fp16_13))[name = tensor<string, []>("aw_347_cast_fp16")];
+            tensor<string, []> aw_349_equation_0 = const()[name = tensor<string, []>("aw_349_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_349_cast_fp16 = einsum(equation = aw_349_equation_0, values = (var_2350_cast_fp16_14, var_2328_cast_fp16_14))[name = tensor<string, []>("aw_349_cast_fp16")];
+            tensor<string, []> aw_351_equation_0 = const()[name = tensor<string, []>("aw_351_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_351_cast_fp16 = einsum(equation = aw_351_equation_0, values = (var_2350_cast_fp16_15, var_2328_cast_fp16_15))[name = tensor<string, []>("aw_351_cast_fp16")];
+            tensor<string, []> aw_353_equation_0 = const()[name = tensor<string, []>("aw_353_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_353_cast_fp16 = einsum(equation = aw_353_equation_0, values = (var_2350_cast_fp16_16, var_2328_cast_fp16_16))[name = tensor<string, []>("aw_353_cast_fp16")];
+            tensor<string, []> aw_355_equation_0 = const()[name = tensor<string, []>("aw_355_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_355_cast_fp16 = einsum(equation = aw_355_equation_0, values = (var_2350_cast_fp16_17, var_2328_cast_fp16_17))[name = tensor<string, []>("aw_355_cast_fp16")];
+            tensor<string, []> aw_357_equation_0 = const()[name = tensor<string, []>("aw_357_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_357_cast_fp16 = einsum(equation = aw_357_equation_0, values = (var_2350_cast_fp16_18, var_2328_cast_fp16_18))[name = tensor<string, []>("aw_357_cast_fp16")];
+            tensor<string, []> aw_359_equation_0 = const()[name = tensor<string, []>("aw_359_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_359_cast_fp16 = einsum(equation = aw_359_equation_0, values = (var_2350_cast_fp16_19, var_2328_cast_fp16_19))[name = tensor<string, []>("aw_359_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2432_cast_fp16 = softmax(axis = var_2276, x = aw_321_cast_fp16)[name = tensor<string, []>("op_2432_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2433_cast_fp16 = softmax(axis = var_2276, x = aw_323_cast_fp16)[name = tensor<string, []>("op_2433_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2434_cast_fp16 = softmax(axis = var_2276, x = aw_325_cast_fp16)[name = tensor<string, []>("op_2434_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2435_cast_fp16 = softmax(axis = var_2276, x = aw_327_cast_fp16)[name = tensor<string, []>("op_2435_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2436_cast_fp16 = softmax(axis = var_2276, x = aw_329_cast_fp16)[name = tensor<string, []>("op_2436_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2437_cast_fp16 = softmax(axis = var_2276, x = aw_331_cast_fp16)[name = tensor<string, []>("op_2437_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2438_cast_fp16 = softmax(axis = var_2276, x = aw_333_cast_fp16)[name = tensor<string, []>("op_2438_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2439_cast_fp16 = softmax(axis = var_2276, x = aw_335_cast_fp16)[name = tensor<string, []>("op_2439_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2440_cast_fp16 = softmax(axis = var_2276, x = aw_337_cast_fp16)[name = tensor<string, []>("op_2440_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2441_cast_fp16 = softmax(axis = var_2276, x = aw_339_cast_fp16)[name = tensor<string, []>("op_2441_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2442_cast_fp16 = softmax(axis = var_2276, x = aw_341_cast_fp16)[name = tensor<string, []>("op_2442_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2443_cast_fp16 = softmax(axis = var_2276, x = aw_343_cast_fp16)[name = tensor<string, []>("op_2443_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2444_cast_fp16 = softmax(axis = var_2276, x = aw_345_cast_fp16)[name = tensor<string, []>("op_2444_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2445_cast_fp16 = softmax(axis = var_2276, x = aw_347_cast_fp16)[name = tensor<string, []>("op_2445_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2446_cast_fp16 = softmax(axis = var_2276, x = aw_349_cast_fp16)[name = tensor<string, []>("op_2446_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2447_cast_fp16 = softmax(axis = var_2276, x = aw_351_cast_fp16)[name = tensor<string, []>("op_2447_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2448_cast_fp16 = softmax(axis = var_2276, x = aw_353_cast_fp16)[name = tensor<string, []>("op_2448_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2449_cast_fp16 = softmax(axis = var_2276, x = aw_355_cast_fp16)[name = tensor<string, []>("op_2449_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2450_cast_fp16 = softmax(axis = var_2276, x = aw_357_cast_fp16)[name = tensor<string, []>("op_2450_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2451_cast_fp16 = softmax(axis = var_2276, x = aw_359_cast_fp16)[name = tensor<string, []>("op_2451_cast_fp16")];
+            tensor<string, []> var_2453_equation_0 = const()[name = tensor<string, []>("op_2453_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2453_cast_fp16 = einsum(equation = var_2453_equation_0, values = (var_2371_cast_fp16_0, var_2432_cast_fp16))[name = tensor<string, []>("op_2453_cast_fp16")];
+            tensor<string, []> var_2455_equation_0 = const()[name = tensor<string, []>("op_2455_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2455_cast_fp16 = einsum(equation = var_2455_equation_0, values = (var_2371_cast_fp16_1, var_2433_cast_fp16))[name = tensor<string, []>("op_2455_cast_fp16")];
+            tensor<string, []> var_2457_equation_0 = const()[name = tensor<string, []>("op_2457_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2457_cast_fp16 = einsum(equation = var_2457_equation_0, values = (var_2371_cast_fp16_2, var_2434_cast_fp16))[name = tensor<string, []>("op_2457_cast_fp16")];
+            tensor<string, []> var_2459_equation_0 = const()[name = tensor<string, []>("op_2459_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2459_cast_fp16 = einsum(equation = var_2459_equation_0, values = (var_2371_cast_fp16_3, var_2435_cast_fp16))[name = tensor<string, []>("op_2459_cast_fp16")];
+            tensor<string, []> var_2461_equation_0 = const()[name = tensor<string, []>("op_2461_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2461_cast_fp16 = einsum(equation = var_2461_equation_0, values = (var_2371_cast_fp16_4, var_2436_cast_fp16))[name = tensor<string, []>("op_2461_cast_fp16")];
+            tensor<string, []> var_2463_equation_0 = const()[name = tensor<string, []>("op_2463_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2463_cast_fp16 = einsum(equation = var_2463_equation_0, values = (var_2371_cast_fp16_5, var_2437_cast_fp16))[name = tensor<string, []>("op_2463_cast_fp16")];
+            tensor<string, []> var_2465_equation_0 = const()[name = tensor<string, []>("op_2465_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2465_cast_fp16 = einsum(equation = var_2465_equation_0, values = (var_2371_cast_fp16_6, var_2438_cast_fp16))[name = tensor<string, []>("op_2465_cast_fp16")];
+            tensor<string, []> var_2467_equation_0 = const()[name = tensor<string, []>("op_2467_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2467_cast_fp16 = einsum(equation = var_2467_equation_0, values = (var_2371_cast_fp16_7, var_2439_cast_fp16))[name = tensor<string, []>("op_2467_cast_fp16")];
+            tensor<string, []> var_2469_equation_0 = const()[name = tensor<string, []>("op_2469_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2469_cast_fp16 = einsum(equation = var_2469_equation_0, values = (var_2371_cast_fp16_8, var_2440_cast_fp16))[name = tensor<string, []>("op_2469_cast_fp16")];
+            tensor<string, []> var_2471_equation_0 = const()[name = tensor<string, []>("op_2471_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2471_cast_fp16 = einsum(equation = var_2471_equation_0, values = (var_2371_cast_fp16_9, var_2441_cast_fp16))[name = tensor<string, []>("op_2471_cast_fp16")];
+            tensor<string, []> var_2473_equation_0 = const()[name = tensor<string, []>("op_2473_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2473_cast_fp16 = einsum(equation = var_2473_equation_0, values = (var_2371_cast_fp16_10, var_2442_cast_fp16))[name = tensor<string, []>("op_2473_cast_fp16")];
+            tensor<string, []> var_2475_equation_0 = const()[name = tensor<string, []>("op_2475_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2475_cast_fp16 = einsum(equation = var_2475_equation_0, values = (var_2371_cast_fp16_11, var_2443_cast_fp16))[name = tensor<string, []>("op_2475_cast_fp16")];
+            tensor<string, []> var_2477_equation_0 = const()[name = tensor<string, []>("op_2477_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2477_cast_fp16 = einsum(equation = var_2477_equation_0, values = (var_2371_cast_fp16_12, var_2444_cast_fp16))[name = tensor<string, []>("op_2477_cast_fp16")];
+            tensor<string, []> var_2479_equation_0 = const()[name = tensor<string, []>("op_2479_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2479_cast_fp16 = einsum(equation = var_2479_equation_0, values = (var_2371_cast_fp16_13, var_2445_cast_fp16))[name = tensor<string, []>("op_2479_cast_fp16")];
+            tensor<string, []> var_2481_equation_0 = const()[name = tensor<string, []>("op_2481_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2481_cast_fp16 = einsum(equation = var_2481_equation_0, values = (var_2371_cast_fp16_14, var_2446_cast_fp16))[name = tensor<string, []>("op_2481_cast_fp16")];
+            tensor<string, []> var_2483_equation_0 = const()[name = tensor<string, []>("op_2483_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2483_cast_fp16 = einsum(equation = var_2483_equation_0, values = (var_2371_cast_fp16_15, var_2447_cast_fp16))[name = tensor<string, []>("op_2483_cast_fp16")];
+            tensor<string, []> var_2485_equation_0 = const()[name = tensor<string, []>("op_2485_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2485_cast_fp16 = einsum(equation = var_2485_equation_0, values = (var_2371_cast_fp16_16, var_2448_cast_fp16))[name = tensor<string, []>("op_2485_cast_fp16")];
+            tensor<string, []> var_2487_equation_0 = const()[name = tensor<string, []>("op_2487_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2487_cast_fp16 = einsum(equation = var_2487_equation_0, values = (var_2371_cast_fp16_17, var_2449_cast_fp16))[name = tensor<string, []>("op_2487_cast_fp16")];
+            tensor<string, []> var_2489_equation_0 = const()[name = tensor<string, []>("op_2489_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2489_cast_fp16 = einsum(equation = var_2489_equation_0, values = (var_2371_cast_fp16_18, var_2450_cast_fp16))[name = tensor<string, []>("op_2489_cast_fp16")];
+            tensor<string, []> var_2491_equation_0 = const()[name = tensor<string, []>("op_2491_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2491_cast_fp16 = einsum(equation = var_2491_equation_0, values = (var_2371_cast_fp16_19, var_2451_cast_fp16))[name = tensor<string, []>("op_2491_cast_fp16")];
+            tensor<bool, []> input_85_interleave_0 = const()[name = tensor<string, []>("input_85_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_85_cast_fp16 = concat(axis = var_2276, interleave = input_85_interleave_0, values = (var_2453_cast_fp16, var_2455_cast_fp16, var_2457_cast_fp16, var_2459_cast_fp16, var_2461_cast_fp16, var_2463_cast_fp16, var_2465_cast_fp16, var_2467_cast_fp16, var_2469_cast_fp16, var_2471_cast_fp16, var_2473_cast_fp16, var_2475_cast_fp16, var_2477_cast_fp16, var_2479_cast_fp16, var_2481_cast_fp16, var_2483_cast_fp16, var_2485_cast_fp16, var_2487_cast_fp16, var_2489_cast_fp16, var_2491_cast_fp16))[name = tensor<string, []>("input_85_cast_fp16")];
+            tensor<string, []> var_2500_pad_type_0 = const()[name = tensor<string, []>("op_2500_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2500_strides_0 = const()[name = tensor<string, []>("op_2500_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2500_pad_0 = const()[name = tensor<string, []>("op_2500_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2500_dilations_0 = const()[name = tensor<string, []>("op_2500_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2500_groups_0 = const()[name = tensor<string, []>("op_2500_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_8_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(338957632)))];
+            tensor<fp16, [1280]> blocks_8_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(342234496)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2500_cast_fp16 = conv(bias = blocks_8_attn_out_bias_to_fp16, dilations = var_2500_dilations_0, groups = var_2500_groups_0, pad = var_2500_pad_0, pad_type = var_2500_pad_type_0, strides = var_2500_strides_0, weight = blocks_8_attn_out_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("op_2500_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = var_2500_cast_fp16)[name = tensor<string, []>("inputs_35_cast_fp16")];
+            tensor<int32, [1]> input_87_axes_0 = const()[name = tensor<string, []>("input_87_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_87_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_87_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(342237120)))];
+            tensor<fp16, [1280]> input_87_beta_0_to_fp16 = const()[name = tensor<string, []>("input_87_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(342239744)))];
+            tensor<fp16, []> var_2510_to_fp16 = const()[name = tensor<string, []>("op_2510_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_87_cast_fp16 = layer_norm(axes = input_87_axes_0, beta = input_87_beta_0_to_fp16, epsilon = var_2510_to_fp16, gamma = input_87_gamma_0_to_fp16, x = inputs_35_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
+            tensor<string, []> input_89_pad_type_0 = const()[name = tensor<string, []>("input_89_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_89_strides_0 = const()[name = tensor<string, []>("input_89_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_89_pad_0 = const()[name = tensor<string, []>("input_89_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_89_dilations_0 = const()[name = tensor<string, []>("input_89_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_89_groups_0 = const()[name = tensor<string, []>("input_89_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_8_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(342242368)))];
+            tensor<fp16, [5120]> blocks_8_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(355349632)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_89_cast_fp16 = conv(bias = blocks_8_mlp_0_bias_to_fp16, dilations = input_89_dilations_0, groups = input_89_groups_0, pad = input_89_pad_0, pad_type = input_89_pad_type_0, strides = input_89_strides_0, weight = blocks_8_mlp_0_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
+            tensor<string, []> input_91_mode_0 = const()[name = tensor<string, []>("input_91_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_91_cast_fp16 = gelu(mode = input_91_mode_0, x = input_89_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
+            tensor<string, []> var_2536_pad_type_0 = const()[name = tensor<string, []>("op_2536_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2536_strides_0 = const()[name = tensor<string, []>("op_2536_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2536_pad_0 = const()[name = tensor<string, []>("op_2536_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2536_dilations_0 = const()[name = tensor<string, []>("op_2536_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2536_groups_0 = const()[name = tensor<string, []>("op_2536_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_8_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(355359936)))];
+            tensor<fp16, [1280]> blocks_8_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368467200)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2536_cast_fp16 = conv(bias = blocks_8_mlp_2_bias_to_fp16, dilations = var_2536_dilations_0, groups = var_2536_groups_0, pad = var_2536_pad_0, pad_type = var_2536_pad_type_0, strides = var_2536_strides_0, weight = blocks_8_mlp_2_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("op_2536_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = var_2536_cast_fp16)[name = tensor<string, []>("inputs_37_cast_fp16")];
+            tensor<int32, []> var_2545 = const()[name = tensor<string, []>("op_2545"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_93_axes_0 = const()[name = tensor<string, []>("input_93_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_93_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_93_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368469824)))];
+            tensor<fp16, [1280]> input_93_beta_0_to_fp16 = const()[name = tensor<string, []>("input_93_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368472448)))];
+            tensor<fp16, []> var_2561_to_fp16 = const()[name = tensor<string, []>("op_2561_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_93_cast_fp16 = layer_norm(axes = input_93_axes_0, beta = input_93_beta_0_to_fp16, epsilon = var_2561_to_fp16, gamma = input_93_gamma_0_to_fp16, x = inputs_37_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
+            tensor<string, []> q_19_pad_type_0 = const()[name = tensor<string, []>("q_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_19_strides_0 = const()[name = tensor<string, []>("q_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_19_pad_0 = const()[name = tensor<string, []>("q_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_19_dilations_0 = const()[name = tensor<string, []>("q_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_19_groups_0 = const()[name = tensor<string, []>("q_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_2596_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2596_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368475072)))];
+            tensor<fp16, [1280]> var_2596_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2596_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(371751936)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2596_cast_fp16 = conv(bias = var_2596_bias_0_to_fp16, dilations = q_19_dilations_0, groups = q_19_groups_0, pad = q_19_pad_0, pad_type = q_19_pad_type_0, strides = q_19_strides_0, weight = var_2596_weight_0_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("op_2596_cast_fp16")];
+            tensor<string, []> k_19_pad_type_0 = const()[name = tensor<string, []>("k_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_19_strides_0 = const()[name = tensor<string, []>("k_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_19_pad_0 = const()[name = tensor<string, []>("k_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_19_dilations_0 = const()[name = tensor<string, []>("k_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_19_groups_0 = const()[name = tensor<string, []>("k_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_9_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(371754560)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_19_cast_fp16 = conv(dilations = k_19_dilations_0, groups = k_19_groups_0, pad = k_19_pad_0, pad_type = k_19_pad_type_0, strides = k_19_strides_0, weight = blocks_9_attn_key_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("k_19_cast_fp16")];
+            tensor<string, []> var_2594_pad_type_0 = const()[name = tensor<string, []>("op_2594_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2594_strides_0 = const()[name = tensor<string, []>("op_2594_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2594_pad_0 = const()[name = tensor<string, []>("op_2594_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2594_dilations_0 = const()[name = tensor<string, []>("op_2594_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2594_groups_0 = const()[name = tensor<string, []>("op_2594_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_9_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(375031424)))];
+            tensor<fp16, [1280]> blocks_9_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(378308288)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2594_cast_fp16 = conv(bias = blocks_9_attn_value_bias_to_fp16, dilations = var_2594_dilations_0, groups = var_2594_groups_0, pad = var_2594_pad_0, pad_type = var_2594_pad_type_0, strides = var_2594_strides_0, weight = blocks_9_attn_value_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("op_2594_cast_fp16")];
+            tensor<int32, [20]> tile_27 = const()[name = tensor<string, []>("tile_27"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2597_axis_0 = const()[name = tensor<string, []>("op_2597_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_19 = split(axis = var_2597_axis_0, split_sizes = tile_27, x = var_2596_cast_fp16)[name = tensor<string, []>("op_2597_cast_fp16")];
+            tensor<int32, [4]> var_2618_perm_0 = const()[name = tensor<string, []>("op_2618_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_28 = const()[name = tensor<string, []>("tile_28"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2619_axis_0 = const()[name = tensor<string, []>("op_2619_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_2618_cast_fp16 = transpose(perm = var_2618_perm_0, x = k_19_cast_fp16)[name = tensor<string, []>("transpose_23")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_19 = split(axis = var_2619_axis_0, split_sizes = tile_28, x = var_2618_cast_fp16)[name = tensor<string, []>("op_2619_cast_fp16")];
+            tensor<int32, [20]> tile_29 = const()[name = tensor<string, []>("tile_29"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2640_axis_0 = const()[name = tensor<string, []>("op_2640_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_19 = split(axis = var_2640_axis_0, split_sizes = tile_29, x = var_2594_cast_fp16)[name = tensor<string, []>("op_2640_cast_fp16")];
+            tensor<string, []> aw_361_equation_0 = const()[name = tensor<string, []>("aw_361_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_361_cast_fp16 = einsum(equation = aw_361_equation_0, values = (var_2619_cast_fp16_0, var_2597_cast_fp16_0))[name = tensor<string, []>("aw_361_cast_fp16")];
+            tensor<string, []> aw_363_equation_0 = const()[name = tensor<string, []>("aw_363_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_363_cast_fp16 = einsum(equation = aw_363_equation_0, values = (var_2619_cast_fp16_1, var_2597_cast_fp16_1))[name = tensor<string, []>("aw_363_cast_fp16")];
+            tensor<string, []> aw_365_equation_0 = const()[name = tensor<string, []>("aw_365_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_365_cast_fp16 = einsum(equation = aw_365_equation_0, values = (var_2619_cast_fp16_2, var_2597_cast_fp16_2))[name = tensor<string, []>("aw_365_cast_fp16")];
+            tensor<string, []> aw_367_equation_0 = const()[name = tensor<string, []>("aw_367_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_367_cast_fp16 = einsum(equation = aw_367_equation_0, values = (var_2619_cast_fp16_3, var_2597_cast_fp16_3))[name = tensor<string, []>("aw_367_cast_fp16")];
+            tensor<string, []> aw_369_equation_0 = const()[name = tensor<string, []>("aw_369_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_369_cast_fp16 = einsum(equation = aw_369_equation_0, values = (var_2619_cast_fp16_4, var_2597_cast_fp16_4))[name = tensor<string, []>("aw_369_cast_fp16")];
+            tensor<string, []> aw_371_equation_0 = const()[name = tensor<string, []>("aw_371_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_371_cast_fp16 = einsum(equation = aw_371_equation_0, values = (var_2619_cast_fp16_5, var_2597_cast_fp16_5))[name = tensor<string, []>("aw_371_cast_fp16")];
+            tensor<string, []> aw_373_equation_0 = const()[name = tensor<string, []>("aw_373_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_373_cast_fp16 = einsum(equation = aw_373_equation_0, values = (var_2619_cast_fp16_6, var_2597_cast_fp16_6))[name = tensor<string, []>("aw_373_cast_fp16")];
+            tensor<string, []> aw_375_equation_0 = const()[name = tensor<string, []>("aw_375_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_375_cast_fp16 = einsum(equation = aw_375_equation_0, values = (var_2619_cast_fp16_7, var_2597_cast_fp16_7))[name = tensor<string, []>("aw_375_cast_fp16")];
+            tensor<string, []> aw_377_equation_0 = const()[name = tensor<string, []>("aw_377_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_377_cast_fp16 = einsum(equation = aw_377_equation_0, values = (var_2619_cast_fp16_8, var_2597_cast_fp16_8))[name = tensor<string, []>("aw_377_cast_fp16")];
+            tensor<string, []> aw_379_equation_0 = const()[name = tensor<string, []>("aw_379_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_379_cast_fp16 = einsum(equation = aw_379_equation_0, values = (var_2619_cast_fp16_9, var_2597_cast_fp16_9))[name = tensor<string, []>("aw_379_cast_fp16")];
+            tensor<string, []> aw_381_equation_0 = const()[name = tensor<string, []>("aw_381_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_381_cast_fp16 = einsum(equation = aw_381_equation_0, values = (var_2619_cast_fp16_10, var_2597_cast_fp16_10))[name = tensor<string, []>("aw_381_cast_fp16")];
+            tensor<string, []> aw_383_equation_0 = const()[name = tensor<string, []>("aw_383_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_383_cast_fp16 = einsum(equation = aw_383_equation_0, values = (var_2619_cast_fp16_11, var_2597_cast_fp16_11))[name = tensor<string, []>("aw_383_cast_fp16")];
+            tensor<string, []> aw_385_equation_0 = const()[name = tensor<string, []>("aw_385_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_385_cast_fp16 = einsum(equation = aw_385_equation_0, values = (var_2619_cast_fp16_12, var_2597_cast_fp16_12))[name = tensor<string, []>("aw_385_cast_fp16")];
+            tensor<string, []> aw_387_equation_0 = const()[name = tensor<string, []>("aw_387_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_387_cast_fp16 = einsum(equation = aw_387_equation_0, values = (var_2619_cast_fp16_13, var_2597_cast_fp16_13))[name = tensor<string, []>("aw_387_cast_fp16")];
+            tensor<string, []> aw_389_equation_0 = const()[name = tensor<string, []>("aw_389_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_389_cast_fp16 = einsum(equation = aw_389_equation_0, values = (var_2619_cast_fp16_14, var_2597_cast_fp16_14))[name = tensor<string, []>("aw_389_cast_fp16")];
+            tensor<string, []> aw_391_equation_0 = const()[name = tensor<string, []>("aw_391_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_391_cast_fp16 = einsum(equation = aw_391_equation_0, values = (var_2619_cast_fp16_15, var_2597_cast_fp16_15))[name = tensor<string, []>("aw_391_cast_fp16")];
+            tensor<string, []> aw_393_equation_0 = const()[name = tensor<string, []>("aw_393_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_393_cast_fp16 = einsum(equation = aw_393_equation_0, values = (var_2619_cast_fp16_16, var_2597_cast_fp16_16))[name = tensor<string, []>("aw_393_cast_fp16")];
+            tensor<string, []> aw_395_equation_0 = const()[name = tensor<string, []>("aw_395_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_395_cast_fp16 = einsum(equation = aw_395_equation_0, values = (var_2619_cast_fp16_17, var_2597_cast_fp16_17))[name = tensor<string, []>("aw_395_cast_fp16")];
+            tensor<string, []> aw_397_equation_0 = const()[name = tensor<string, []>("aw_397_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_397_cast_fp16 = einsum(equation = aw_397_equation_0, values = (var_2619_cast_fp16_18, var_2597_cast_fp16_18))[name = tensor<string, []>("aw_397_cast_fp16")];
+            tensor<string, []> aw_399_equation_0 = const()[name = tensor<string, []>("aw_399_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_399_cast_fp16 = einsum(equation = aw_399_equation_0, values = (var_2619_cast_fp16_19, var_2597_cast_fp16_19))[name = tensor<string, []>("aw_399_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2701_cast_fp16 = softmax(axis = var_2545, x = aw_361_cast_fp16)[name = tensor<string, []>("op_2701_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2702_cast_fp16 = softmax(axis = var_2545, x = aw_363_cast_fp16)[name = tensor<string, []>("op_2702_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2703_cast_fp16 = softmax(axis = var_2545, x = aw_365_cast_fp16)[name = tensor<string, []>("op_2703_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2704_cast_fp16 = softmax(axis = var_2545, x = aw_367_cast_fp16)[name = tensor<string, []>("op_2704_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2705_cast_fp16 = softmax(axis = var_2545, x = aw_369_cast_fp16)[name = tensor<string, []>("op_2705_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2706_cast_fp16 = softmax(axis = var_2545, x = aw_371_cast_fp16)[name = tensor<string, []>("op_2706_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2707_cast_fp16 = softmax(axis = var_2545, x = aw_373_cast_fp16)[name = tensor<string, []>("op_2707_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2708_cast_fp16 = softmax(axis = var_2545, x = aw_375_cast_fp16)[name = tensor<string, []>("op_2708_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2709_cast_fp16 = softmax(axis = var_2545, x = aw_377_cast_fp16)[name = tensor<string, []>("op_2709_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2710_cast_fp16 = softmax(axis = var_2545, x = aw_379_cast_fp16)[name = tensor<string, []>("op_2710_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2711_cast_fp16 = softmax(axis = var_2545, x = aw_381_cast_fp16)[name = tensor<string, []>("op_2711_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2712_cast_fp16 = softmax(axis = var_2545, x = aw_383_cast_fp16)[name = tensor<string, []>("op_2712_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2713_cast_fp16 = softmax(axis = var_2545, x = aw_385_cast_fp16)[name = tensor<string, []>("op_2713_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2714_cast_fp16 = softmax(axis = var_2545, x = aw_387_cast_fp16)[name = tensor<string, []>("op_2714_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2715_cast_fp16 = softmax(axis = var_2545, x = aw_389_cast_fp16)[name = tensor<string, []>("op_2715_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2716_cast_fp16 = softmax(axis = var_2545, x = aw_391_cast_fp16)[name = tensor<string, []>("op_2716_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2717_cast_fp16 = softmax(axis = var_2545, x = aw_393_cast_fp16)[name = tensor<string, []>("op_2717_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2718_cast_fp16 = softmax(axis = var_2545, x = aw_395_cast_fp16)[name = tensor<string, []>("op_2718_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2719_cast_fp16 = softmax(axis = var_2545, x = aw_397_cast_fp16)[name = tensor<string, []>("op_2719_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2720_cast_fp16 = softmax(axis = var_2545, x = aw_399_cast_fp16)[name = tensor<string, []>("op_2720_cast_fp16")];
+            tensor<string, []> var_2722_equation_0 = const()[name = tensor<string, []>("op_2722_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2722_cast_fp16 = einsum(equation = var_2722_equation_0, values = (var_2640_cast_fp16_0, var_2701_cast_fp16))[name = tensor<string, []>("op_2722_cast_fp16")];
+            tensor<string, []> var_2724_equation_0 = const()[name = tensor<string, []>("op_2724_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2724_cast_fp16 = einsum(equation = var_2724_equation_0, values = (var_2640_cast_fp16_1, var_2702_cast_fp16))[name = tensor<string, []>("op_2724_cast_fp16")];
+            tensor<string, []> var_2726_equation_0 = const()[name = tensor<string, []>("op_2726_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2726_cast_fp16 = einsum(equation = var_2726_equation_0, values = (var_2640_cast_fp16_2, var_2703_cast_fp16))[name = tensor<string, []>("op_2726_cast_fp16")];
+            tensor<string, []> var_2728_equation_0 = const()[name = tensor<string, []>("op_2728_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2728_cast_fp16 = einsum(equation = var_2728_equation_0, values = (var_2640_cast_fp16_3, var_2704_cast_fp16))[name = tensor<string, []>("op_2728_cast_fp16")];
+            tensor<string, []> var_2730_equation_0 = const()[name = tensor<string, []>("op_2730_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2730_cast_fp16 = einsum(equation = var_2730_equation_0, values = (var_2640_cast_fp16_4, var_2705_cast_fp16))[name = tensor<string, []>("op_2730_cast_fp16")];
+            tensor<string, []> var_2732_equation_0 = const()[name = tensor<string, []>("op_2732_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2732_cast_fp16 = einsum(equation = var_2732_equation_0, values = (var_2640_cast_fp16_5, var_2706_cast_fp16))[name = tensor<string, []>("op_2732_cast_fp16")];
+            tensor<string, []> var_2734_equation_0 = const()[name = tensor<string, []>("op_2734_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2734_cast_fp16 = einsum(equation = var_2734_equation_0, values = (var_2640_cast_fp16_6, var_2707_cast_fp16))[name = tensor<string, []>("op_2734_cast_fp16")];
+            tensor<string, []> var_2736_equation_0 = const()[name = tensor<string, []>("op_2736_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2736_cast_fp16 = einsum(equation = var_2736_equation_0, values = (var_2640_cast_fp16_7, var_2708_cast_fp16))[name = tensor<string, []>("op_2736_cast_fp16")];
+            tensor<string, []> var_2738_equation_0 = const()[name = tensor<string, []>("op_2738_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2738_cast_fp16 = einsum(equation = var_2738_equation_0, values = (var_2640_cast_fp16_8, var_2709_cast_fp16))[name = tensor<string, []>("op_2738_cast_fp16")];
+            tensor<string, []> var_2740_equation_0 = const()[name = tensor<string, []>("op_2740_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2740_cast_fp16 = einsum(equation = var_2740_equation_0, values = (var_2640_cast_fp16_9, var_2710_cast_fp16))[name = tensor<string, []>("op_2740_cast_fp16")];
+            tensor<string, []> var_2742_equation_0 = const()[name = tensor<string, []>("op_2742_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2742_cast_fp16 = einsum(equation = var_2742_equation_0, values = (var_2640_cast_fp16_10, var_2711_cast_fp16))[name = tensor<string, []>("op_2742_cast_fp16")];
+            tensor<string, []> var_2744_equation_0 = const()[name = tensor<string, []>("op_2744_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2744_cast_fp16 = einsum(equation = var_2744_equation_0, values = (var_2640_cast_fp16_11, var_2712_cast_fp16))[name = tensor<string, []>("op_2744_cast_fp16")];
+            tensor<string, []> var_2746_equation_0 = const()[name = tensor<string, []>("op_2746_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2746_cast_fp16 = einsum(equation = var_2746_equation_0, values = (var_2640_cast_fp16_12, var_2713_cast_fp16))[name = tensor<string, []>("op_2746_cast_fp16")];
+            tensor<string, []> var_2748_equation_0 = const()[name = tensor<string, []>("op_2748_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2748_cast_fp16 = einsum(equation = var_2748_equation_0, values = (var_2640_cast_fp16_13, var_2714_cast_fp16))[name = tensor<string, []>("op_2748_cast_fp16")];
+            tensor<string, []> var_2750_equation_0 = const()[name = tensor<string, []>("op_2750_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2750_cast_fp16 = einsum(equation = var_2750_equation_0, values = (var_2640_cast_fp16_14, var_2715_cast_fp16))[name = tensor<string, []>("op_2750_cast_fp16")];
+            tensor<string, []> var_2752_equation_0 = const()[name = tensor<string, []>("op_2752_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2752_cast_fp16 = einsum(equation = var_2752_equation_0, values = (var_2640_cast_fp16_15, var_2716_cast_fp16))[name = tensor<string, []>("op_2752_cast_fp16")];
+            tensor<string, []> var_2754_equation_0 = const()[name = tensor<string, []>("op_2754_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2754_cast_fp16 = einsum(equation = var_2754_equation_0, values = (var_2640_cast_fp16_16, var_2717_cast_fp16))[name = tensor<string, []>("op_2754_cast_fp16")];
+            tensor<string, []> var_2756_equation_0 = const()[name = tensor<string, []>("op_2756_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2756_cast_fp16 = einsum(equation = var_2756_equation_0, values = (var_2640_cast_fp16_17, var_2718_cast_fp16))[name = tensor<string, []>("op_2756_cast_fp16")];
+            tensor<string, []> var_2758_equation_0 = const()[name = tensor<string, []>("op_2758_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2758_cast_fp16 = einsum(equation = var_2758_equation_0, values = (var_2640_cast_fp16_18, var_2719_cast_fp16))[name = tensor<string, []>("op_2758_cast_fp16")];
+            tensor<string, []> var_2760_equation_0 = const()[name = tensor<string, []>("op_2760_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2760_cast_fp16 = einsum(equation = var_2760_equation_0, values = (var_2640_cast_fp16_19, var_2720_cast_fp16))[name = tensor<string, []>("op_2760_cast_fp16")];
+            tensor<bool, []> input_95_interleave_0 = const()[name = tensor<string, []>("input_95_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_95_cast_fp16 = concat(axis = var_2545, interleave = input_95_interleave_0, values = (var_2722_cast_fp16, var_2724_cast_fp16, var_2726_cast_fp16, var_2728_cast_fp16, var_2730_cast_fp16, var_2732_cast_fp16, var_2734_cast_fp16, var_2736_cast_fp16, var_2738_cast_fp16, var_2740_cast_fp16, var_2742_cast_fp16, var_2744_cast_fp16, var_2746_cast_fp16, var_2748_cast_fp16, var_2750_cast_fp16, var_2752_cast_fp16, var_2754_cast_fp16, var_2756_cast_fp16, var_2758_cast_fp16, var_2760_cast_fp16))[name = tensor<string, []>("input_95_cast_fp16")];
+            tensor<string, []> var_2769_pad_type_0 = const()[name = tensor<string, []>("op_2769_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2769_strides_0 = const()[name = tensor<string, []>("op_2769_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2769_pad_0 = const()[name = tensor<string, []>("op_2769_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2769_dilations_0 = const()[name = tensor<string, []>("op_2769_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2769_groups_0 = const()[name = tensor<string, []>("op_2769_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_9_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(378310912)))];
+            tensor<fp16, [1280]> blocks_9_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(381587776)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2769_cast_fp16 = conv(bias = blocks_9_attn_out_bias_to_fp16, dilations = var_2769_dilations_0, groups = var_2769_groups_0, pad = var_2769_pad_0, pad_type = var_2769_pad_type_0, strides = var_2769_strides_0, weight = blocks_9_attn_out_weight_to_fp16, x = input_95_cast_fp16)[name = tensor<string, []>("op_2769_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = var_2769_cast_fp16)[name = tensor<string, []>("inputs_39_cast_fp16")];
+            tensor<int32, [1]> input_97_axes_0 = const()[name = tensor<string, []>("input_97_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_97_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_97_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(381590400)))];
+            tensor<fp16, [1280]> input_97_beta_0_to_fp16 = const()[name = tensor<string, []>("input_97_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(381593024)))];
+            tensor<fp16, []> var_2779_to_fp16 = const()[name = tensor<string, []>("op_2779_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_97_cast_fp16 = layer_norm(axes = input_97_axes_0, beta = input_97_beta_0_to_fp16, epsilon = var_2779_to_fp16, gamma = input_97_gamma_0_to_fp16, x = inputs_39_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")];
+            tensor<string, []> input_99_pad_type_0 = const()[name = tensor<string, []>("input_99_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_99_strides_0 = const()[name = tensor<string, []>("input_99_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_99_pad_0 = const()[name = tensor<string, []>("input_99_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_99_dilations_0 = const()[name = tensor<string, []>("input_99_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_99_groups_0 = const()[name = tensor<string, []>("input_99_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_9_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(381595648)))];
+            tensor<fp16, [5120]> blocks_9_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(394702912)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_99_cast_fp16 = conv(bias = blocks_9_mlp_0_bias_to_fp16, dilations = input_99_dilations_0, groups = input_99_groups_0, pad = input_99_pad_0, pad_type = input_99_pad_type_0, strides = input_99_strides_0, weight = blocks_9_mlp_0_weight_to_fp16, x = input_97_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
+            tensor<string, []> input_101_mode_0 = const()[name = tensor<string, []>("input_101_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_101_cast_fp16 = gelu(mode = input_101_mode_0, x = input_99_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
+            tensor<string, []> var_2805_pad_type_0 = const()[name = tensor<string, []>("op_2805_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2805_strides_0 = const()[name = tensor<string, []>("op_2805_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2805_pad_0 = const()[name = tensor<string, []>("op_2805_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2805_dilations_0 = const()[name = tensor<string, []>("op_2805_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2805_groups_0 = const()[name = tensor<string, []>("op_2805_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_9_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(394713216)))];
+            tensor<fp16, [1280]> blocks_9_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(407820480)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2805_cast_fp16 = conv(bias = blocks_9_mlp_2_bias_to_fp16, dilations = var_2805_dilations_0, groups = var_2805_groups_0, pad = var_2805_pad_0, pad_type = var_2805_pad_type_0, strides = var_2805_strides_0, weight = blocks_9_mlp_2_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("op_2805_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = var_2805_cast_fp16)[name = tensor<string, []>("inputs_41_cast_fp16")];
+            tensor<int32, []> var_2814 = const()[name = tensor<string, []>("op_2814"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_103_axes_0 = const()[name = tensor<string, []>("input_103_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_103_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_103_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(407823104)))];
+            tensor<fp16, [1280]> input_103_beta_0_to_fp16 = const()[name = tensor<string, []>("input_103_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(407825728)))];
+            tensor<fp16, []> var_2830_to_fp16 = const()[name = tensor<string, []>("op_2830_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_103_cast_fp16 = layer_norm(axes = input_103_axes_0, beta = input_103_beta_0_to_fp16, epsilon = var_2830_to_fp16, gamma = input_103_gamma_0_to_fp16, x = inputs_41_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
+            tensor<string, []> q_21_pad_type_0 = const()[name = tensor<string, []>("q_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_21_strides_0 = const()[name = tensor<string, []>("q_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_21_pad_0 = const()[name = tensor<string, []>("q_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_21_dilations_0 = const()[name = tensor<string, []>("q_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_21_groups_0 = const()[name = tensor<string, []>("q_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_2865_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2865_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(407828352)))];
+            tensor<fp16, [1280]> var_2865_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2865_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(411105216)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2865_cast_fp16 = conv(bias = var_2865_bias_0_to_fp16, dilations = q_21_dilations_0, groups = q_21_groups_0, pad = q_21_pad_0, pad_type = q_21_pad_type_0, strides = q_21_strides_0, weight = var_2865_weight_0_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("op_2865_cast_fp16")];
+            tensor<string, []> k_21_pad_type_0 = const()[name = tensor<string, []>("k_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_21_strides_0 = const()[name = tensor<string, []>("k_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_21_pad_0 = const()[name = tensor<string, []>("k_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_21_dilations_0 = const()[name = tensor<string, []>("k_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_21_groups_0 = const()[name = tensor<string, []>("k_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_10_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(411107840)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_21_cast_fp16 = conv(dilations = k_21_dilations_0, groups = k_21_groups_0, pad = k_21_pad_0, pad_type = k_21_pad_type_0, strides = k_21_strides_0, weight = blocks_10_attn_key_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("k_21_cast_fp16")];
+            tensor<string, []> var_2863_pad_type_0 = const()[name = tensor<string, []>("op_2863_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2863_strides_0 = const()[name = tensor<string, []>("op_2863_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2863_pad_0 = const()[name = tensor<string, []>("op_2863_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2863_dilations_0 = const()[name = tensor<string, []>("op_2863_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2863_groups_0 = const()[name = tensor<string, []>("op_2863_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_10_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(414384704)))];
+            tensor<fp16, [1280]> blocks_10_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(417661568)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2863_cast_fp16 = conv(bias = blocks_10_attn_value_bias_to_fp16, dilations = var_2863_dilations_0, groups = var_2863_groups_0, pad = var_2863_pad_0, pad_type = var_2863_pad_type_0, strides = var_2863_strides_0, weight = blocks_10_attn_value_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("op_2863_cast_fp16")];
+            tensor<int32, [20]> tile_30 = const()[name = tensor<string, []>("tile_30"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2866_axis_0 = const()[name = tensor<string, []>("op_2866_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_19 = split(axis = var_2866_axis_0, split_sizes = tile_30, x = var_2865_cast_fp16)[name = tensor<string, []>("op_2866_cast_fp16")];
+            tensor<int32, [4]> var_2887_perm_0 = const()[name = tensor<string, []>("op_2887_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_31 = const()[name = tensor<string, []>("tile_31"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2888_axis_0 = const()[name = tensor<string, []>("op_2888_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_2887_cast_fp16 = transpose(perm = var_2887_perm_0, x = k_21_cast_fp16)[name = tensor<string, []>("transpose_22")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_19 = split(axis = var_2888_axis_0, split_sizes = tile_31, x = var_2887_cast_fp16)[name = tensor<string, []>("op_2888_cast_fp16")];
+            tensor<int32, [20]> tile_32 = const()[name = tensor<string, []>("tile_32"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2909_axis_0 = const()[name = tensor<string, []>("op_2909_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_19 = split(axis = var_2909_axis_0, split_sizes = tile_32, x = var_2863_cast_fp16)[name = tensor<string, []>("op_2909_cast_fp16")];
+            tensor<string, []> aw_401_equation_0 = const()[name = tensor<string, []>("aw_401_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_401_cast_fp16 = einsum(equation = aw_401_equation_0, values = (var_2888_cast_fp16_0, var_2866_cast_fp16_0))[name = tensor<string, []>("aw_401_cast_fp16")];
+            tensor<string, []> aw_403_equation_0 = const()[name = tensor<string, []>("aw_403_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_403_cast_fp16 = einsum(equation = aw_403_equation_0, values = (var_2888_cast_fp16_1, var_2866_cast_fp16_1))[name = tensor<string, []>("aw_403_cast_fp16")];
+            tensor<string, []> aw_405_equation_0 = const()[name = tensor<string, []>("aw_405_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_405_cast_fp16 = einsum(equation = aw_405_equation_0, values = (var_2888_cast_fp16_2, var_2866_cast_fp16_2))[name = tensor<string, []>("aw_405_cast_fp16")];
+            tensor<string, []> aw_407_equation_0 = const()[name = tensor<string, []>("aw_407_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_407_cast_fp16 = einsum(equation = aw_407_equation_0, values = (var_2888_cast_fp16_3, var_2866_cast_fp16_3))[name = tensor<string, []>("aw_407_cast_fp16")];
+            tensor<string, []> aw_409_equation_0 = const()[name = tensor<string, []>("aw_409_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_409_cast_fp16 = einsum(equation = aw_409_equation_0, values = (var_2888_cast_fp16_4, var_2866_cast_fp16_4))[name = tensor<string, []>("aw_409_cast_fp16")];
+            tensor<string, []> aw_411_equation_0 = const()[name = tensor<string, []>("aw_411_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_411_cast_fp16 = einsum(equation = aw_411_equation_0, values = (var_2888_cast_fp16_5, var_2866_cast_fp16_5))[name = tensor<string, []>("aw_411_cast_fp16")];
+            tensor<string, []> aw_413_equation_0 = const()[name = tensor<string, []>("aw_413_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_413_cast_fp16 = einsum(equation = aw_413_equation_0, values = (var_2888_cast_fp16_6, var_2866_cast_fp16_6))[name = tensor<string, []>("aw_413_cast_fp16")];
+            tensor<string, []> aw_415_equation_0 = const()[name = tensor<string, []>("aw_415_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_415_cast_fp16 = einsum(equation = aw_415_equation_0, values = (var_2888_cast_fp16_7, var_2866_cast_fp16_7))[name = tensor<string, []>("aw_415_cast_fp16")];
+            tensor<string, []> aw_417_equation_0 = const()[name = tensor<string, []>("aw_417_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_417_cast_fp16 = einsum(equation = aw_417_equation_0, values = (var_2888_cast_fp16_8, var_2866_cast_fp16_8))[name = tensor<string, []>("aw_417_cast_fp16")];
+            tensor<string, []> aw_419_equation_0 = const()[name = tensor<string, []>("aw_419_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_419_cast_fp16 = einsum(equation = aw_419_equation_0, values = (var_2888_cast_fp16_9, var_2866_cast_fp16_9))[name = tensor<string, []>("aw_419_cast_fp16")];
+            tensor<string, []> aw_421_equation_0 = const()[name = tensor<string, []>("aw_421_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_421_cast_fp16 = einsum(equation = aw_421_equation_0, values = (var_2888_cast_fp16_10, var_2866_cast_fp16_10))[name = tensor<string, []>("aw_421_cast_fp16")];
+            tensor<string, []> aw_423_equation_0 = const()[name = tensor<string, []>("aw_423_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_423_cast_fp16 = einsum(equation = aw_423_equation_0, values = (var_2888_cast_fp16_11, var_2866_cast_fp16_11))[name = tensor<string, []>("aw_423_cast_fp16")];
+            tensor<string, []> aw_425_equation_0 = const()[name = tensor<string, []>("aw_425_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_425_cast_fp16 = einsum(equation = aw_425_equation_0, values = (var_2888_cast_fp16_12, var_2866_cast_fp16_12))[name = tensor<string, []>("aw_425_cast_fp16")];
+            tensor<string, []> aw_427_equation_0 = const()[name = tensor<string, []>("aw_427_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_427_cast_fp16 = einsum(equation = aw_427_equation_0, values = (var_2888_cast_fp16_13, var_2866_cast_fp16_13))[name = tensor<string, []>("aw_427_cast_fp16")];
+            tensor<string, []> aw_429_equation_0 = const()[name = tensor<string, []>("aw_429_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_429_cast_fp16 = einsum(equation = aw_429_equation_0, values = (var_2888_cast_fp16_14, var_2866_cast_fp16_14))[name = tensor<string, []>("aw_429_cast_fp16")];
+            tensor<string, []> aw_431_equation_0 = const()[name = tensor<string, []>("aw_431_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_431_cast_fp16 = einsum(equation = aw_431_equation_0, values = (var_2888_cast_fp16_15, var_2866_cast_fp16_15))[name = tensor<string, []>("aw_431_cast_fp16")];
+            tensor<string, []> aw_433_equation_0 = const()[name = tensor<string, []>("aw_433_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_433_cast_fp16 = einsum(equation = aw_433_equation_0, values = (var_2888_cast_fp16_16, var_2866_cast_fp16_16))[name = tensor<string, []>("aw_433_cast_fp16")];
+            tensor<string, []> aw_435_equation_0 = const()[name = tensor<string, []>("aw_435_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_435_cast_fp16 = einsum(equation = aw_435_equation_0, values = (var_2888_cast_fp16_17, var_2866_cast_fp16_17))[name = tensor<string, []>("aw_435_cast_fp16")];
+            tensor<string, []> aw_437_equation_0 = const()[name = tensor<string, []>("aw_437_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_437_cast_fp16 = einsum(equation = aw_437_equation_0, values = (var_2888_cast_fp16_18, var_2866_cast_fp16_18))[name = tensor<string, []>("aw_437_cast_fp16")];
+            tensor<string, []> aw_439_equation_0 = const()[name = tensor<string, []>("aw_439_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_439_cast_fp16 = einsum(equation = aw_439_equation_0, values = (var_2888_cast_fp16_19, var_2866_cast_fp16_19))[name = tensor<string, []>("aw_439_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2970_cast_fp16 = softmax(axis = var_2814, x = aw_401_cast_fp16)[name = tensor<string, []>("op_2970_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2971_cast_fp16 = softmax(axis = var_2814, x = aw_403_cast_fp16)[name = tensor<string, []>("op_2971_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2972_cast_fp16 = softmax(axis = var_2814, x = aw_405_cast_fp16)[name = tensor<string, []>("op_2972_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2973_cast_fp16 = softmax(axis = var_2814, x = aw_407_cast_fp16)[name = tensor<string, []>("op_2973_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2974_cast_fp16 = softmax(axis = var_2814, x = aw_409_cast_fp16)[name = tensor<string, []>("op_2974_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2975_cast_fp16 = softmax(axis = var_2814, x = aw_411_cast_fp16)[name = tensor<string, []>("op_2975_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2976_cast_fp16 = softmax(axis = var_2814, x = aw_413_cast_fp16)[name = tensor<string, []>("op_2976_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2977_cast_fp16 = softmax(axis = var_2814, x = aw_415_cast_fp16)[name = tensor<string, []>("op_2977_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2978_cast_fp16 = softmax(axis = var_2814, x = aw_417_cast_fp16)[name = tensor<string, []>("op_2978_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2979_cast_fp16 = softmax(axis = var_2814, x = aw_419_cast_fp16)[name = tensor<string, []>("op_2979_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2980_cast_fp16 = softmax(axis = var_2814, x = aw_421_cast_fp16)[name = tensor<string, []>("op_2980_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2981_cast_fp16 = softmax(axis = var_2814, x = aw_423_cast_fp16)[name = tensor<string, []>("op_2981_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2982_cast_fp16 = softmax(axis = var_2814, x = aw_425_cast_fp16)[name = tensor<string, []>("op_2982_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2983_cast_fp16 = softmax(axis = var_2814, x = aw_427_cast_fp16)[name = tensor<string, []>("op_2983_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2984_cast_fp16 = softmax(axis = var_2814, x = aw_429_cast_fp16)[name = tensor<string, []>("op_2984_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2985_cast_fp16 = softmax(axis = var_2814, x = aw_431_cast_fp16)[name = tensor<string, []>("op_2985_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2986_cast_fp16 = softmax(axis = var_2814, x = aw_433_cast_fp16)[name = tensor<string, []>("op_2986_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2987_cast_fp16 = softmax(axis = var_2814, x = aw_435_cast_fp16)[name = tensor<string, []>("op_2987_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2988_cast_fp16 = softmax(axis = var_2814, x = aw_437_cast_fp16)[name = tensor<string, []>("op_2988_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2989_cast_fp16 = softmax(axis = var_2814, x = aw_439_cast_fp16)[name = tensor<string, []>("op_2989_cast_fp16")];
+            tensor<string, []> var_2991_equation_0 = const()[name = tensor<string, []>("op_2991_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2991_cast_fp16 = einsum(equation = var_2991_equation_0, values = (var_2909_cast_fp16_0, var_2970_cast_fp16))[name = tensor<string, []>("op_2991_cast_fp16")];
+            tensor<string, []> var_2993_equation_0 = const()[name = tensor<string, []>("op_2993_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2993_cast_fp16 = einsum(equation = var_2993_equation_0, values = (var_2909_cast_fp16_1, var_2971_cast_fp16))[name = tensor<string, []>("op_2993_cast_fp16")];
+            tensor<string, []> var_2995_equation_0 = const()[name = tensor<string, []>("op_2995_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2995_cast_fp16 = einsum(equation = var_2995_equation_0, values = (var_2909_cast_fp16_2, var_2972_cast_fp16))[name = tensor<string, []>("op_2995_cast_fp16")];
+            tensor<string, []> var_2997_equation_0 = const()[name = tensor<string, []>("op_2997_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2997_cast_fp16 = einsum(equation = var_2997_equation_0, values = (var_2909_cast_fp16_3, var_2973_cast_fp16))[name = tensor<string, []>("op_2997_cast_fp16")];
+            tensor<string, []> var_2999_equation_0 = const()[name = tensor<string, []>("op_2999_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2999_cast_fp16 = einsum(equation = var_2999_equation_0, values = (var_2909_cast_fp16_4, var_2974_cast_fp16))[name = tensor<string, []>("op_2999_cast_fp16")];
+            tensor<string, []> var_3001_equation_0 = const()[name = tensor<string, []>("op_3001_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3001_cast_fp16 = einsum(equation = var_3001_equation_0, values = (var_2909_cast_fp16_5, var_2975_cast_fp16))[name = tensor<string, []>("op_3001_cast_fp16")];
+            tensor<string, []> var_3003_equation_0 = const()[name = tensor<string, []>("op_3003_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3003_cast_fp16 = einsum(equation = var_3003_equation_0, values = (var_2909_cast_fp16_6, var_2976_cast_fp16))[name = tensor<string, []>("op_3003_cast_fp16")];
+            tensor<string, []> var_3005_equation_0 = const()[name = tensor<string, []>("op_3005_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3005_cast_fp16 = einsum(equation = var_3005_equation_0, values = (var_2909_cast_fp16_7, var_2977_cast_fp16))[name = tensor<string, []>("op_3005_cast_fp16")];
+            tensor<string, []> var_3007_equation_0 = const()[name = tensor<string, []>("op_3007_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3007_cast_fp16 = einsum(equation = var_3007_equation_0, values = (var_2909_cast_fp16_8, var_2978_cast_fp16))[name = tensor<string, []>("op_3007_cast_fp16")];
+            tensor<string, []> var_3009_equation_0 = const()[name = tensor<string, []>("op_3009_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3009_cast_fp16 = einsum(equation = var_3009_equation_0, values = (var_2909_cast_fp16_9, var_2979_cast_fp16))[name = tensor<string, []>("op_3009_cast_fp16")];
+            tensor<string, []> var_3011_equation_0 = const()[name = tensor<string, []>("op_3011_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3011_cast_fp16 = einsum(equation = var_3011_equation_0, values = (var_2909_cast_fp16_10, var_2980_cast_fp16))[name = tensor<string, []>("op_3011_cast_fp16")];
+            tensor<string, []> var_3013_equation_0 = const()[name = tensor<string, []>("op_3013_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3013_cast_fp16 = einsum(equation = var_3013_equation_0, values = (var_2909_cast_fp16_11, var_2981_cast_fp16))[name = tensor<string, []>("op_3013_cast_fp16")];
+            tensor<string, []> var_3015_equation_0 = const()[name = tensor<string, []>("op_3015_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3015_cast_fp16 = einsum(equation = var_3015_equation_0, values = (var_2909_cast_fp16_12, var_2982_cast_fp16))[name = tensor<string, []>("op_3015_cast_fp16")];
+            tensor<string, []> var_3017_equation_0 = const()[name = tensor<string, []>("op_3017_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3017_cast_fp16 = einsum(equation = var_3017_equation_0, values = (var_2909_cast_fp16_13, var_2983_cast_fp16))[name = tensor<string, []>("op_3017_cast_fp16")];
+            tensor<string, []> var_3019_equation_0 = const()[name = tensor<string, []>("op_3019_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3019_cast_fp16 = einsum(equation = var_3019_equation_0, values = (var_2909_cast_fp16_14, var_2984_cast_fp16))[name = tensor<string, []>("op_3019_cast_fp16")];
+            tensor<string, []> var_3021_equation_0 = const()[name = tensor<string, []>("op_3021_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3021_cast_fp16 = einsum(equation = var_3021_equation_0, values = (var_2909_cast_fp16_15, var_2985_cast_fp16))[name = tensor<string, []>("op_3021_cast_fp16")];
+            tensor<string, []> var_3023_equation_0 = const()[name = tensor<string, []>("op_3023_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3023_cast_fp16 = einsum(equation = var_3023_equation_0, values = (var_2909_cast_fp16_16, var_2986_cast_fp16))[name = tensor<string, []>("op_3023_cast_fp16")];
+            tensor<string, []> var_3025_equation_0 = const()[name = tensor<string, []>("op_3025_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3025_cast_fp16 = einsum(equation = var_3025_equation_0, values = (var_2909_cast_fp16_17, var_2987_cast_fp16))[name = tensor<string, []>("op_3025_cast_fp16")];
+            tensor<string, []> var_3027_equation_0 = const()[name = tensor<string, []>("op_3027_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3027_cast_fp16 = einsum(equation = var_3027_equation_0, values = (var_2909_cast_fp16_18, var_2988_cast_fp16))[name = tensor<string, []>("op_3027_cast_fp16")];
+            tensor<string, []> var_3029_equation_0 = const()[name = tensor<string, []>("op_3029_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3029_cast_fp16 = einsum(equation = var_3029_equation_0, values = (var_2909_cast_fp16_19, var_2989_cast_fp16))[name = tensor<string, []>("op_3029_cast_fp16")];
+            tensor<bool, []> input_105_interleave_0 = const()[name = tensor<string, []>("input_105_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_105_cast_fp16 = concat(axis = var_2814, interleave = input_105_interleave_0, values = (var_2991_cast_fp16, var_2993_cast_fp16, var_2995_cast_fp16, var_2997_cast_fp16, var_2999_cast_fp16, var_3001_cast_fp16, var_3003_cast_fp16, var_3005_cast_fp16, var_3007_cast_fp16, var_3009_cast_fp16, var_3011_cast_fp16, var_3013_cast_fp16, var_3015_cast_fp16, var_3017_cast_fp16, var_3019_cast_fp16, var_3021_cast_fp16, var_3023_cast_fp16, var_3025_cast_fp16, var_3027_cast_fp16, var_3029_cast_fp16))[name = tensor<string, []>("input_105_cast_fp16")];
+            tensor<string, []> var_3038_pad_type_0 = const()[name = tensor<string, []>("op_3038_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3038_strides_0 = const()[name = tensor<string, []>("op_3038_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3038_pad_0 = const()[name = tensor<string, []>("op_3038_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3038_dilations_0 = const()[name = tensor<string, []>("op_3038_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3038_groups_0 = const()[name = tensor<string, []>("op_3038_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_10_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(417664192)))];
+            tensor<fp16, [1280]> blocks_10_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(420941056)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3038_cast_fp16 = conv(bias = blocks_10_attn_out_bias_to_fp16, dilations = var_3038_dilations_0, groups = var_3038_groups_0, pad = var_3038_pad_0, pad_type = var_3038_pad_type_0, strides = var_3038_strides_0, weight = blocks_10_attn_out_weight_to_fp16, x = input_105_cast_fp16)[name = tensor<string, []>("op_3038_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = var_3038_cast_fp16)[name = tensor<string, []>("inputs_43_cast_fp16")];
+            tensor<int32, [1]> input_107_axes_0 = const()[name = tensor<string, []>("input_107_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_107_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_107_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(420943680)))];
+            tensor<fp16, [1280]> input_107_beta_0_to_fp16 = const()[name = tensor<string, []>("input_107_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(420946304)))];
+            tensor<fp16, []> var_3048_to_fp16 = const()[name = tensor<string, []>("op_3048_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_107_cast_fp16 = layer_norm(axes = input_107_axes_0, beta = input_107_beta_0_to_fp16, epsilon = var_3048_to_fp16, gamma = input_107_gamma_0_to_fp16, x = inputs_43_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
+            tensor<string, []> input_109_pad_type_0 = const()[name = tensor<string, []>("input_109_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_109_strides_0 = const()[name = tensor<string, []>("input_109_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_109_pad_0 = const()[name = tensor<string, []>("input_109_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_109_dilations_0 = const()[name = tensor<string, []>("input_109_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_109_groups_0 = const()[name = tensor<string, []>("input_109_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_10_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(420948928)))];
+            tensor<fp16, [5120]> blocks_10_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434056192)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_109_cast_fp16 = conv(bias = blocks_10_mlp_0_bias_to_fp16, dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = blocks_10_mlp_0_weight_to_fp16, x = input_107_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
+            tensor<string, []> input_111_mode_0 = const()[name = tensor<string, []>("input_111_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_111_cast_fp16 = gelu(mode = input_111_mode_0, x = input_109_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
+            tensor<string, []> var_3074_pad_type_0 = const()[name = tensor<string, []>("op_3074_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3074_strides_0 = const()[name = tensor<string, []>("op_3074_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3074_pad_0 = const()[name = tensor<string, []>("op_3074_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3074_dilations_0 = const()[name = tensor<string, []>("op_3074_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3074_groups_0 = const()[name = tensor<string, []>("op_3074_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_10_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434066496)))];
+            tensor<fp16, [1280]> blocks_10_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447173760)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3074_cast_fp16 = conv(bias = blocks_10_mlp_2_bias_to_fp16, dilations = var_3074_dilations_0, groups = var_3074_groups_0, pad = var_3074_pad_0, pad_type = var_3074_pad_type_0, strides = var_3074_strides_0, weight = blocks_10_mlp_2_weight_to_fp16, x = input_111_cast_fp16)[name = tensor<string, []>("op_3074_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = var_3074_cast_fp16)[name = tensor<string, []>("inputs_45_cast_fp16")];
+            tensor<int32, []> var_3083 = const()[name = tensor<string, []>("op_3083"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_113_axes_0 = const()[name = tensor<string, []>("input_113_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_113_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_113_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447176384)))];
+            tensor<fp16, [1280]> input_113_beta_0_to_fp16 = const()[name = tensor<string, []>("input_113_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447179008)))];
+            tensor<fp16, []> var_3099_to_fp16 = const()[name = tensor<string, []>("op_3099_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_113_cast_fp16 = layer_norm(axes = input_113_axes_0, beta = input_113_beta_0_to_fp16, epsilon = var_3099_to_fp16, gamma = input_113_gamma_0_to_fp16, x = inputs_45_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
+            tensor<string, []> q_23_pad_type_0 = const()[name = tensor<string, []>("q_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_23_strides_0 = const()[name = tensor<string, []>("q_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_23_pad_0 = const()[name = tensor<string, []>("q_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_23_dilations_0 = const()[name = tensor<string, []>("q_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_23_groups_0 = const()[name = tensor<string, []>("q_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_3134_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3134_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447181632)))];
+            tensor<fp16, [1280]> var_3134_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3134_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(450458496)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3134_cast_fp16 = conv(bias = var_3134_bias_0_to_fp16, dilations = q_23_dilations_0, groups = q_23_groups_0, pad = q_23_pad_0, pad_type = q_23_pad_type_0, strides = q_23_strides_0, weight = var_3134_weight_0_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("op_3134_cast_fp16")];
+            tensor<string, []> k_23_pad_type_0 = const()[name = tensor<string, []>("k_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_23_strides_0 = const()[name = tensor<string, []>("k_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_23_pad_0 = const()[name = tensor<string, []>("k_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_23_dilations_0 = const()[name = tensor<string, []>("k_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_23_groups_0 = const()[name = tensor<string, []>("k_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_11_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(450461120)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_23_cast_fp16 = conv(dilations = k_23_dilations_0, groups = k_23_groups_0, pad = k_23_pad_0, pad_type = k_23_pad_type_0, strides = k_23_strides_0, weight = blocks_11_attn_key_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("k_23_cast_fp16")];
+            tensor<string, []> var_3132_pad_type_0 = const()[name = tensor<string, []>("op_3132_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3132_strides_0 = const()[name = tensor<string, []>("op_3132_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3132_pad_0 = const()[name = tensor<string, []>("op_3132_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3132_dilations_0 = const()[name = tensor<string, []>("op_3132_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3132_groups_0 = const()[name = tensor<string, []>("op_3132_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_11_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(453737984)))];
+            tensor<fp16, [1280]> blocks_11_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(457014848)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3132_cast_fp16 = conv(bias = blocks_11_attn_value_bias_to_fp16, dilations = var_3132_dilations_0, groups = var_3132_groups_0, pad = var_3132_pad_0, pad_type = var_3132_pad_type_0, strides = var_3132_strides_0, weight = blocks_11_attn_value_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("op_3132_cast_fp16")];
+            tensor<int32, [20]> tile_33 = const()[name = tensor<string, []>("tile_33"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3135_axis_0 = const()[name = tensor<string, []>("op_3135_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_19 = split(axis = var_3135_axis_0, split_sizes = tile_33, x = var_3134_cast_fp16)[name = tensor<string, []>("op_3135_cast_fp16")];
+            tensor<int32, [4]> var_3156_perm_0 = const()[name = tensor<string, []>("op_3156_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_34 = const()[name = tensor<string, []>("tile_34"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3157_axis_0 = const()[name = tensor<string, []>("op_3157_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_3156_cast_fp16 = transpose(perm = var_3156_perm_0, x = k_23_cast_fp16)[name = tensor<string, []>("transpose_21")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_19 = split(axis = var_3157_axis_0, split_sizes = tile_34, x = var_3156_cast_fp16)[name = tensor<string, []>("op_3157_cast_fp16")];
+            tensor<int32, [20]> tile_35 = const()[name = tensor<string, []>("tile_35"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3178_axis_0 = const()[name = tensor<string, []>("op_3178_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_19 = split(axis = var_3178_axis_0, split_sizes = tile_35, x = var_3132_cast_fp16)[name = tensor<string, []>("op_3178_cast_fp16")];
+            tensor<string, []> aw_441_equation_0 = const()[name = tensor<string, []>("aw_441_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_441_cast_fp16 = einsum(equation = aw_441_equation_0, values = (var_3157_cast_fp16_0, var_3135_cast_fp16_0))[name = tensor<string, []>("aw_441_cast_fp16")];
+            tensor<string, []> aw_443_equation_0 = const()[name = tensor<string, []>("aw_443_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_443_cast_fp16 = einsum(equation = aw_443_equation_0, values = (var_3157_cast_fp16_1, var_3135_cast_fp16_1))[name = tensor<string, []>("aw_443_cast_fp16")];
+            tensor<string, []> aw_445_equation_0 = const()[name = tensor<string, []>("aw_445_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_445_cast_fp16 = einsum(equation = aw_445_equation_0, values = (var_3157_cast_fp16_2, var_3135_cast_fp16_2))[name = tensor<string, []>("aw_445_cast_fp16")];
+            tensor<string, []> aw_447_equation_0 = const()[name = tensor<string, []>("aw_447_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_447_cast_fp16 = einsum(equation = aw_447_equation_0, values = (var_3157_cast_fp16_3, var_3135_cast_fp16_3))[name = tensor<string, []>("aw_447_cast_fp16")];
+            tensor<string, []> aw_449_equation_0 = const()[name = tensor<string, []>("aw_449_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_449_cast_fp16 = einsum(equation = aw_449_equation_0, values = (var_3157_cast_fp16_4, var_3135_cast_fp16_4))[name = tensor<string, []>("aw_449_cast_fp16")];
+            tensor<string, []> aw_451_equation_0 = const()[name = tensor<string, []>("aw_451_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_451_cast_fp16 = einsum(equation = aw_451_equation_0, values = (var_3157_cast_fp16_5, var_3135_cast_fp16_5))[name = tensor<string, []>("aw_451_cast_fp16")];
+            tensor<string, []> aw_453_equation_0 = const()[name = tensor<string, []>("aw_453_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_453_cast_fp16 = einsum(equation = aw_453_equation_0, values = (var_3157_cast_fp16_6, var_3135_cast_fp16_6))[name = tensor<string, []>("aw_453_cast_fp16")];
+            tensor<string, []> aw_455_equation_0 = const()[name = tensor<string, []>("aw_455_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_455_cast_fp16 = einsum(equation = aw_455_equation_0, values = (var_3157_cast_fp16_7, var_3135_cast_fp16_7))[name = tensor<string, []>("aw_455_cast_fp16")];
+            tensor<string, []> aw_457_equation_0 = const()[name = tensor<string, []>("aw_457_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_457_cast_fp16 = einsum(equation = aw_457_equation_0, values = (var_3157_cast_fp16_8, var_3135_cast_fp16_8))[name = tensor<string, []>("aw_457_cast_fp16")];
+            tensor<string, []> aw_459_equation_0 = const()[name = tensor<string, []>("aw_459_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_459_cast_fp16 = einsum(equation = aw_459_equation_0, values = (var_3157_cast_fp16_9, var_3135_cast_fp16_9))[name = tensor<string, []>("aw_459_cast_fp16")];
+            tensor<string, []> aw_461_equation_0 = const()[name = tensor<string, []>("aw_461_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_461_cast_fp16 = einsum(equation = aw_461_equation_0, values = (var_3157_cast_fp16_10, var_3135_cast_fp16_10))[name = tensor<string, []>("aw_461_cast_fp16")];
+            tensor<string, []> aw_463_equation_0 = const()[name = tensor<string, []>("aw_463_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_463_cast_fp16 = einsum(equation = aw_463_equation_0, values = (var_3157_cast_fp16_11, var_3135_cast_fp16_11))[name = tensor<string, []>("aw_463_cast_fp16")];
+            tensor<string, []> aw_465_equation_0 = const()[name = tensor<string, []>("aw_465_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_465_cast_fp16 = einsum(equation = aw_465_equation_0, values = (var_3157_cast_fp16_12, var_3135_cast_fp16_12))[name = tensor<string, []>("aw_465_cast_fp16")];
+            tensor<string, []> aw_467_equation_0 = const()[name = tensor<string, []>("aw_467_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_467_cast_fp16 = einsum(equation = aw_467_equation_0, values = (var_3157_cast_fp16_13, var_3135_cast_fp16_13))[name = tensor<string, []>("aw_467_cast_fp16")];
+            tensor<string, []> aw_469_equation_0 = const()[name = tensor<string, []>("aw_469_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_469_cast_fp16 = einsum(equation = aw_469_equation_0, values = (var_3157_cast_fp16_14, var_3135_cast_fp16_14))[name = tensor<string, []>("aw_469_cast_fp16")];
+            tensor<string, []> aw_471_equation_0 = const()[name = tensor<string, []>("aw_471_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_471_cast_fp16 = einsum(equation = aw_471_equation_0, values = (var_3157_cast_fp16_15, var_3135_cast_fp16_15))[name = tensor<string, []>("aw_471_cast_fp16")];
+            tensor<string, []> aw_473_equation_0 = const()[name = tensor<string, []>("aw_473_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_473_cast_fp16 = einsum(equation = aw_473_equation_0, values = (var_3157_cast_fp16_16, var_3135_cast_fp16_16))[name = tensor<string, []>("aw_473_cast_fp16")];
+            tensor<string, []> aw_475_equation_0 = const()[name = tensor<string, []>("aw_475_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_475_cast_fp16 = einsum(equation = aw_475_equation_0, values = (var_3157_cast_fp16_17, var_3135_cast_fp16_17))[name = tensor<string, []>("aw_475_cast_fp16")];
+            tensor<string, []> aw_477_equation_0 = const()[name = tensor<string, []>("aw_477_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_477_cast_fp16 = einsum(equation = aw_477_equation_0, values = (var_3157_cast_fp16_18, var_3135_cast_fp16_18))[name = tensor<string, []>("aw_477_cast_fp16")];
+            tensor<string, []> aw_479_equation_0 = const()[name = tensor<string, []>("aw_479_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_479_cast_fp16 = einsum(equation = aw_479_equation_0, values = (var_3157_cast_fp16_19, var_3135_cast_fp16_19))[name = tensor<string, []>("aw_479_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3239_cast_fp16 = softmax(axis = var_3083, x = aw_441_cast_fp16)[name = tensor<string, []>("op_3239_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3240_cast_fp16 = softmax(axis = var_3083, x = aw_443_cast_fp16)[name = tensor<string, []>("op_3240_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3241_cast_fp16 = softmax(axis = var_3083, x = aw_445_cast_fp16)[name = tensor<string, []>("op_3241_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3242_cast_fp16 = softmax(axis = var_3083, x = aw_447_cast_fp16)[name = tensor<string, []>("op_3242_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3243_cast_fp16 = softmax(axis = var_3083, x = aw_449_cast_fp16)[name = tensor<string, []>("op_3243_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3244_cast_fp16 = softmax(axis = var_3083, x = aw_451_cast_fp16)[name = tensor<string, []>("op_3244_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3245_cast_fp16 = softmax(axis = var_3083, x = aw_453_cast_fp16)[name = tensor<string, []>("op_3245_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3246_cast_fp16 = softmax(axis = var_3083, x = aw_455_cast_fp16)[name = tensor<string, []>("op_3246_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3247_cast_fp16 = softmax(axis = var_3083, x = aw_457_cast_fp16)[name = tensor<string, []>("op_3247_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3248_cast_fp16 = softmax(axis = var_3083, x = aw_459_cast_fp16)[name = tensor<string, []>("op_3248_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3249_cast_fp16 = softmax(axis = var_3083, x = aw_461_cast_fp16)[name = tensor<string, []>("op_3249_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3250_cast_fp16 = softmax(axis = var_3083, x = aw_463_cast_fp16)[name = tensor<string, []>("op_3250_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3251_cast_fp16 = softmax(axis = var_3083, x = aw_465_cast_fp16)[name = tensor<string, []>("op_3251_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3252_cast_fp16 = softmax(axis = var_3083, x = aw_467_cast_fp16)[name = tensor<string, []>("op_3252_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3253_cast_fp16 = softmax(axis = var_3083, x = aw_469_cast_fp16)[name = tensor<string, []>("op_3253_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3254_cast_fp16 = softmax(axis = var_3083, x = aw_471_cast_fp16)[name = tensor<string, []>("op_3254_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3255_cast_fp16 = softmax(axis = var_3083, x = aw_473_cast_fp16)[name = tensor<string, []>("op_3255_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3256_cast_fp16 = softmax(axis = var_3083, x = aw_475_cast_fp16)[name = tensor<string, []>("op_3256_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3257_cast_fp16 = softmax(axis = var_3083, x = aw_477_cast_fp16)[name = tensor<string, []>("op_3257_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3258_cast_fp16 = softmax(axis = var_3083, x = aw_479_cast_fp16)[name = tensor<string, []>("op_3258_cast_fp16")];
+            tensor<string, []> var_3260_equation_0 = const()[name = tensor<string, []>("op_3260_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3260_cast_fp16 = einsum(equation = var_3260_equation_0, values = (var_3178_cast_fp16_0, var_3239_cast_fp16))[name = tensor<string, []>("op_3260_cast_fp16")];
+            tensor<string, []> var_3262_equation_0 = const()[name = tensor<string, []>("op_3262_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3262_cast_fp16 = einsum(equation = var_3262_equation_0, values = (var_3178_cast_fp16_1, var_3240_cast_fp16))[name = tensor<string, []>("op_3262_cast_fp16")];
+            tensor<string, []> var_3264_equation_0 = const()[name = tensor<string, []>("op_3264_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3264_cast_fp16 = einsum(equation = var_3264_equation_0, values = (var_3178_cast_fp16_2, var_3241_cast_fp16))[name = tensor<string, []>("op_3264_cast_fp16")];
+            tensor<string, []> var_3266_equation_0 = const()[name = tensor<string, []>("op_3266_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3266_cast_fp16 = einsum(equation = var_3266_equation_0, values = (var_3178_cast_fp16_3, var_3242_cast_fp16))[name = tensor<string, []>("op_3266_cast_fp16")];
+            tensor<string, []> var_3268_equation_0 = const()[name = tensor<string, []>("op_3268_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3268_cast_fp16 = einsum(equation = var_3268_equation_0, values = (var_3178_cast_fp16_4, var_3243_cast_fp16))[name = tensor<string, []>("op_3268_cast_fp16")];
+            tensor<string, []> var_3270_equation_0 = const()[name = tensor<string, []>("op_3270_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3270_cast_fp16 = einsum(equation = var_3270_equation_0, values = (var_3178_cast_fp16_5, var_3244_cast_fp16))[name = tensor<string, []>("op_3270_cast_fp16")];
+            tensor<string, []> var_3272_equation_0 = const()[name = tensor<string, []>("op_3272_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3272_cast_fp16 = einsum(equation = var_3272_equation_0, values = (var_3178_cast_fp16_6, var_3245_cast_fp16))[name = tensor<string, []>("op_3272_cast_fp16")];
+            tensor<string, []> var_3274_equation_0 = const()[name = tensor<string, []>("op_3274_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3274_cast_fp16 = einsum(equation = var_3274_equation_0, values = (var_3178_cast_fp16_7, var_3246_cast_fp16))[name = tensor<string, []>("op_3274_cast_fp16")];
+            tensor<string, []> var_3276_equation_0 = const()[name = tensor<string, []>("op_3276_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16 = einsum(equation = var_3276_equation_0, values = (var_3178_cast_fp16_8, var_3247_cast_fp16))[name = tensor<string, []>("op_3276_cast_fp16")];
+            tensor<string, []> var_3278_equation_0 = const()[name = tensor<string, []>("op_3278_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3278_cast_fp16 = einsum(equation = var_3278_equation_0, values = (var_3178_cast_fp16_9, var_3248_cast_fp16))[name = tensor<string, []>("op_3278_cast_fp16")];
+            tensor<string, []> var_3280_equation_0 = const()[name = tensor<string, []>("op_3280_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3280_cast_fp16 = einsum(equation = var_3280_equation_0, values = (var_3178_cast_fp16_10, var_3249_cast_fp16))[name = tensor<string, []>("op_3280_cast_fp16")];
+            tensor<string, []> var_3282_equation_0 = const()[name = tensor<string, []>("op_3282_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3282_cast_fp16 = einsum(equation = var_3282_equation_0, values = (var_3178_cast_fp16_11, var_3250_cast_fp16))[name = tensor<string, []>("op_3282_cast_fp16")];
+            tensor<string, []> var_3284_equation_0 = const()[name = tensor<string, []>("op_3284_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3284_cast_fp16 = einsum(equation = var_3284_equation_0, values = (var_3178_cast_fp16_12, var_3251_cast_fp16))[name = tensor<string, []>("op_3284_cast_fp16")];
+            tensor<string, []> var_3286_equation_0 = const()[name = tensor<string, []>("op_3286_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3286_cast_fp16 = einsum(equation = var_3286_equation_0, values = (var_3178_cast_fp16_13, var_3252_cast_fp16))[name = tensor<string, []>("op_3286_cast_fp16")];
+            tensor<string, []> var_3288_equation_0 = const()[name = tensor<string, []>("op_3288_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3288_cast_fp16 = einsum(equation = var_3288_equation_0, values = (var_3178_cast_fp16_14, var_3253_cast_fp16))[name = tensor<string, []>("op_3288_cast_fp16")];
+            tensor<string, []> var_3290_equation_0 = const()[name = tensor<string, []>("op_3290_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3290_cast_fp16 = einsum(equation = var_3290_equation_0, values = (var_3178_cast_fp16_15, var_3254_cast_fp16))[name = tensor<string, []>("op_3290_cast_fp16")];
+            tensor<string, []> var_3292_equation_0 = const()[name = tensor<string, []>("op_3292_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3292_cast_fp16 = einsum(equation = var_3292_equation_0, values = (var_3178_cast_fp16_16, var_3255_cast_fp16))[name = tensor<string, []>("op_3292_cast_fp16")];
+            tensor<string, []> var_3294_equation_0 = const()[name = tensor<string, []>("op_3294_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3294_cast_fp16 = einsum(equation = var_3294_equation_0, values = (var_3178_cast_fp16_17, var_3256_cast_fp16))[name = tensor<string, []>("op_3294_cast_fp16")];
+            tensor<string, []> var_3296_equation_0 = const()[name = tensor<string, []>("op_3296_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3296_cast_fp16 = einsum(equation = var_3296_equation_0, values = (var_3178_cast_fp16_18, var_3257_cast_fp16))[name = tensor<string, []>("op_3296_cast_fp16")];
+            tensor<string, []> var_3298_equation_0 = const()[name = tensor<string, []>("op_3298_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3298_cast_fp16 = einsum(equation = var_3298_equation_0, values = (var_3178_cast_fp16_19, var_3258_cast_fp16))[name = tensor<string, []>("op_3298_cast_fp16")];
+            tensor<bool, []> input_115_interleave_0 = const()[name = tensor<string, []>("input_115_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_115_cast_fp16 = concat(axis = var_3083, interleave = input_115_interleave_0, values = (var_3260_cast_fp16, var_3262_cast_fp16, var_3264_cast_fp16, var_3266_cast_fp16, var_3268_cast_fp16, var_3270_cast_fp16, var_3272_cast_fp16, var_3274_cast_fp16, var_3276_cast_fp16, var_3278_cast_fp16, var_3280_cast_fp16, var_3282_cast_fp16, var_3284_cast_fp16, var_3286_cast_fp16, var_3288_cast_fp16, var_3290_cast_fp16, var_3292_cast_fp16, var_3294_cast_fp16, var_3296_cast_fp16, var_3298_cast_fp16))[name = tensor<string, []>("input_115_cast_fp16")];
+            tensor<string, []> var_3307_pad_type_0 = const()[name = tensor<string, []>("op_3307_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3307_strides_0 = const()[name = tensor<string, []>("op_3307_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3307_pad_0 = const()[name = tensor<string, []>("op_3307_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3307_dilations_0 = const()[name = tensor<string, []>("op_3307_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3307_groups_0 = const()[name = tensor<string, []>("op_3307_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_11_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(457017472)))];
+            tensor<fp16, [1280]> blocks_11_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460294336)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3307_cast_fp16 = conv(bias = blocks_11_attn_out_bias_to_fp16, dilations = var_3307_dilations_0, groups = var_3307_groups_0, pad = var_3307_pad_0, pad_type = var_3307_pad_type_0, strides = var_3307_strides_0, weight = blocks_11_attn_out_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("op_3307_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = var_3307_cast_fp16)[name = tensor<string, []>("inputs_47_cast_fp16")];
+            tensor<int32, [1]> input_117_axes_0 = const()[name = tensor<string, []>("input_117_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_117_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_117_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460296960)))];
+            tensor<fp16, [1280]> input_117_beta_0_to_fp16 = const()[name = tensor<string, []>("input_117_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460299584)))];
+            tensor<fp16, []> var_3317_to_fp16 = const()[name = tensor<string, []>("op_3317_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_117_cast_fp16 = layer_norm(axes = input_117_axes_0, beta = input_117_beta_0_to_fp16, epsilon = var_3317_to_fp16, gamma = input_117_gamma_0_to_fp16, x = inputs_47_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
+            tensor<string, []> input_119_pad_type_0 = const()[name = tensor<string, []>("input_119_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_119_strides_0 = const()[name = tensor<string, []>("input_119_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_119_pad_0 = const()[name = tensor<string, []>("input_119_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_119_dilations_0 = const()[name = tensor<string, []>("input_119_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_119_groups_0 = const()[name = tensor<string, []>("input_119_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_11_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460302208)))];
+            tensor<fp16, [5120]> blocks_11_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(473409472)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_119_cast_fp16 = conv(bias = blocks_11_mlp_0_bias_to_fp16, dilations = input_119_dilations_0, groups = input_119_groups_0, pad = input_119_pad_0, pad_type = input_119_pad_type_0, strides = input_119_strides_0, weight = blocks_11_mlp_0_weight_to_fp16, x = input_117_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")];
+            tensor<string, []> input_121_mode_0 = const()[name = tensor<string, []>("input_121_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_121_cast_fp16 = gelu(mode = input_121_mode_0, x = input_119_cast_fp16)[name = tensor<string, []>("input_121_cast_fp16")];
+            tensor<string, []> var_3343_pad_type_0 = const()[name = tensor<string, []>("op_3343_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3343_strides_0 = const()[name = tensor<string, []>("op_3343_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3343_pad_0 = const()[name = tensor<string, []>("op_3343_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3343_dilations_0 = const()[name = tensor<string, []>("op_3343_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3343_groups_0 = const()[name = tensor<string, []>("op_3343_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_11_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(473419776)))];
+            tensor<fp16, [1280]> blocks_11_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486527040)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3343_cast_fp16 = conv(bias = blocks_11_mlp_2_bias_to_fp16, dilations = var_3343_dilations_0, groups = var_3343_groups_0, pad = var_3343_pad_0, pad_type = var_3343_pad_type_0, strides = var_3343_strides_0, weight = blocks_11_mlp_2_weight_to_fp16, x = input_121_cast_fp16)[name = tensor<string, []>("op_3343_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = var_3343_cast_fp16)[name = tensor<string, []>("inputs_49_cast_fp16")];
+            tensor<int32, []> var_3352 = const()[name = tensor<string, []>("op_3352"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_123_axes_0 = const()[name = tensor<string, []>("input_123_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_123_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_123_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486529664)))];
+            tensor<fp16, [1280]> input_123_beta_0_to_fp16 = const()[name = tensor<string, []>("input_123_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486532288)))];
+            tensor<fp16, []> var_3368_to_fp16 = const()[name = tensor<string, []>("op_3368_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_123_cast_fp16 = layer_norm(axes = input_123_axes_0, beta = input_123_beta_0_to_fp16, epsilon = var_3368_to_fp16, gamma = input_123_gamma_0_to_fp16, x = inputs_49_cast_fp16)[name = tensor<string, []>("input_123_cast_fp16")];
+            tensor<string, []> q_25_pad_type_0 = const()[name = tensor<string, []>("q_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_25_strides_0 = const()[name = tensor<string, []>("q_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_25_pad_0 = const()[name = tensor<string, []>("q_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_25_dilations_0 = const()[name = tensor<string, []>("q_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_25_groups_0 = const()[name = tensor<string, []>("q_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_3403_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3403_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486534912)))];
+            tensor<fp16, [1280]> var_3403_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3403_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(489811776)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3403_cast_fp16 = conv(bias = var_3403_bias_0_to_fp16, dilations = q_25_dilations_0, groups = q_25_groups_0, pad = q_25_pad_0, pad_type = q_25_pad_type_0, strides = q_25_strides_0, weight = var_3403_weight_0_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("op_3403_cast_fp16")];
+            tensor<string, []> k_25_pad_type_0 = const()[name = tensor<string, []>("k_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_25_strides_0 = const()[name = tensor<string, []>("k_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_25_pad_0 = const()[name = tensor<string, []>("k_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_25_dilations_0 = const()[name = tensor<string, []>("k_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_25_groups_0 = const()[name = tensor<string, []>("k_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_12_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(489814400)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_25_cast_fp16 = conv(dilations = k_25_dilations_0, groups = k_25_groups_0, pad = k_25_pad_0, pad_type = k_25_pad_type_0, strides = k_25_strides_0, weight = blocks_12_attn_key_weight_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("k_25_cast_fp16")];
+            tensor<string, []> var_3401_pad_type_0 = const()[name = tensor<string, []>("op_3401_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3401_strides_0 = const()[name = tensor<string, []>("op_3401_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3401_pad_0 = const()[name = tensor<string, []>("op_3401_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3401_dilations_0 = const()[name = tensor<string, []>("op_3401_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3401_groups_0 = const()[name = tensor<string, []>("op_3401_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_12_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(493091264)))];
+            tensor<fp16, [1280]> blocks_12_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496368128)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3401_cast_fp16 = conv(bias = blocks_12_attn_value_bias_to_fp16, dilations = var_3401_dilations_0, groups = var_3401_groups_0, pad = var_3401_pad_0, pad_type = var_3401_pad_type_0, strides = var_3401_strides_0, weight = blocks_12_attn_value_weight_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("op_3401_cast_fp16")];
+            tensor<int32, [20]> tile_36 = const()[name = tensor<string, []>("tile_36"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3404_axis_0 = const()[name = tensor<string, []>("op_3404_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_19 = split(axis = var_3404_axis_0, split_sizes = tile_36, x = var_3403_cast_fp16)[name = tensor<string, []>("op_3404_cast_fp16")];
+            tensor<int32, [4]> var_3425_perm_0 = const()[name = tensor<string, []>("op_3425_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_37 = const()[name = tensor<string, []>("tile_37"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3426_axis_0 = const()[name = tensor<string, []>("op_3426_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_3425_cast_fp16 = transpose(perm = var_3425_perm_0, x = k_25_cast_fp16)[name = tensor<string, []>("transpose_20")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_19 = split(axis = var_3426_axis_0, split_sizes = tile_37, x = var_3425_cast_fp16)[name = tensor<string, []>("op_3426_cast_fp16")];
+            tensor<int32, [20]> tile_38 = const()[name = tensor<string, []>("tile_38"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3447_axis_0 = const()[name = tensor<string, []>("op_3447_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_19 = split(axis = var_3447_axis_0, split_sizes = tile_38, x = var_3401_cast_fp16)[name = tensor<string, []>("op_3447_cast_fp16")];
+            tensor<string, []> aw_481_equation_0 = const()[name = tensor<string, []>("aw_481_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_481_cast_fp16 = einsum(equation = aw_481_equation_0, values = (var_3426_cast_fp16_0, var_3404_cast_fp16_0))[name = tensor<string, []>("aw_481_cast_fp16")];
+            tensor<string, []> aw_483_equation_0 = const()[name = tensor<string, []>("aw_483_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_483_cast_fp16 = einsum(equation = aw_483_equation_0, values = (var_3426_cast_fp16_1, var_3404_cast_fp16_1))[name = tensor<string, []>("aw_483_cast_fp16")];
+            tensor<string, []> aw_485_equation_0 = const()[name = tensor<string, []>("aw_485_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_485_cast_fp16 = einsum(equation = aw_485_equation_0, values = (var_3426_cast_fp16_2, var_3404_cast_fp16_2))[name = tensor<string, []>("aw_485_cast_fp16")];
+            tensor<string, []> aw_487_equation_0 = const()[name = tensor<string, []>("aw_487_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_487_cast_fp16 = einsum(equation = aw_487_equation_0, values = (var_3426_cast_fp16_3, var_3404_cast_fp16_3))[name = tensor<string, []>("aw_487_cast_fp16")];
+            tensor<string, []> aw_489_equation_0 = const()[name = tensor<string, []>("aw_489_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_489_cast_fp16 = einsum(equation = aw_489_equation_0, values = (var_3426_cast_fp16_4, var_3404_cast_fp16_4))[name = tensor<string, []>("aw_489_cast_fp16")];
+            tensor<string, []> aw_491_equation_0 = const()[name = tensor<string, []>("aw_491_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_491_cast_fp16 = einsum(equation = aw_491_equation_0, values = (var_3426_cast_fp16_5, var_3404_cast_fp16_5))[name = tensor<string, []>("aw_491_cast_fp16")];
+            tensor<string, []> aw_493_equation_0 = const()[name = tensor<string, []>("aw_493_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_493_cast_fp16 = einsum(equation = aw_493_equation_0, values = (var_3426_cast_fp16_6, var_3404_cast_fp16_6))[name = tensor<string, []>("aw_493_cast_fp16")];
+            tensor<string, []> aw_495_equation_0 = const()[name = tensor<string, []>("aw_495_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_495_cast_fp16 = einsum(equation = aw_495_equation_0, values = (var_3426_cast_fp16_7, var_3404_cast_fp16_7))[name = tensor<string, []>("aw_495_cast_fp16")];
+            tensor<string, []> aw_497_equation_0 = const()[name = tensor<string, []>("aw_497_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_497_cast_fp16 = einsum(equation = aw_497_equation_0, values = (var_3426_cast_fp16_8, var_3404_cast_fp16_8))[name = tensor<string, []>("aw_497_cast_fp16")];
+            tensor<string, []> aw_499_equation_0 = const()[name = tensor<string, []>("aw_499_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_499_cast_fp16 = einsum(equation = aw_499_equation_0, values = (var_3426_cast_fp16_9, var_3404_cast_fp16_9))[name = tensor<string, []>("aw_499_cast_fp16")];
+            tensor<string, []> aw_501_equation_0 = const()[name = tensor<string, []>("aw_501_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_501_cast_fp16 = einsum(equation = aw_501_equation_0, values = (var_3426_cast_fp16_10, var_3404_cast_fp16_10))[name = tensor<string, []>("aw_501_cast_fp16")];
+            tensor<string, []> aw_503_equation_0 = const()[name = tensor<string, []>("aw_503_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_503_cast_fp16 = einsum(equation = aw_503_equation_0, values = (var_3426_cast_fp16_11, var_3404_cast_fp16_11))[name = tensor<string, []>("aw_503_cast_fp16")];
+            tensor<string, []> aw_505_equation_0 = const()[name = tensor<string, []>("aw_505_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_505_cast_fp16 = einsum(equation = aw_505_equation_0, values = (var_3426_cast_fp16_12, var_3404_cast_fp16_12))[name = tensor<string, []>("aw_505_cast_fp16")];
+            tensor<string, []> aw_507_equation_0 = const()[name = tensor<string, []>("aw_507_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_507_cast_fp16 = einsum(equation = aw_507_equation_0, values = (var_3426_cast_fp16_13, var_3404_cast_fp16_13))[name = tensor<string, []>("aw_507_cast_fp16")];
+            tensor<string, []> aw_509_equation_0 = const()[name = tensor<string, []>("aw_509_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_509_cast_fp16 = einsum(equation = aw_509_equation_0, values = (var_3426_cast_fp16_14, var_3404_cast_fp16_14))[name = tensor<string, []>("aw_509_cast_fp16")];
+            tensor<string, []> aw_511_equation_0 = const()[name = tensor<string, []>("aw_511_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_511_cast_fp16 = einsum(equation = aw_511_equation_0, values = (var_3426_cast_fp16_15, var_3404_cast_fp16_15))[name = tensor<string, []>("aw_511_cast_fp16")];
+            tensor<string, []> aw_513_equation_0 = const()[name = tensor<string, []>("aw_513_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_513_cast_fp16 = einsum(equation = aw_513_equation_0, values = (var_3426_cast_fp16_16, var_3404_cast_fp16_16))[name = tensor<string, []>("aw_513_cast_fp16")];
+            tensor<string, []> aw_515_equation_0 = const()[name = tensor<string, []>("aw_515_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_515_cast_fp16 = einsum(equation = aw_515_equation_0, values = (var_3426_cast_fp16_17, var_3404_cast_fp16_17))[name = tensor<string, []>("aw_515_cast_fp16")];
+            tensor<string, []> aw_517_equation_0 = const()[name = tensor<string, []>("aw_517_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_517_cast_fp16 = einsum(equation = aw_517_equation_0, values = (var_3426_cast_fp16_18, var_3404_cast_fp16_18))[name = tensor<string, []>("aw_517_cast_fp16")];
+            tensor<string, []> aw_519_equation_0 = const()[name = tensor<string, []>("aw_519_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_519_cast_fp16 = einsum(equation = aw_519_equation_0, values = (var_3426_cast_fp16_19, var_3404_cast_fp16_19))[name = tensor<string, []>("aw_519_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3508_cast_fp16 = softmax(axis = var_3352, x = aw_481_cast_fp16)[name = tensor<string, []>("op_3508_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3509_cast_fp16 = softmax(axis = var_3352, x = aw_483_cast_fp16)[name = tensor<string, []>("op_3509_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3510_cast_fp16 = softmax(axis = var_3352, x = aw_485_cast_fp16)[name = tensor<string, []>("op_3510_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3511_cast_fp16 = softmax(axis = var_3352, x = aw_487_cast_fp16)[name = tensor<string, []>("op_3511_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3512_cast_fp16 = softmax(axis = var_3352, x = aw_489_cast_fp16)[name = tensor<string, []>("op_3512_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3513_cast_fp16 = softmax(axis = var_3352, x = aw_491_cast_fp16)[name = tensor<string, []>("op_3513_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3514_cast_fp16 = softmax(axis = var_3352, x = aw_493_cast_fp16)[name = tensor<string, []>("op_3514_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3515_cast_fp16 = softmax(axis = var_3352, x = aw_495_cast_fp16)[name = tensor<string, []>("op_3515_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3516_cast_fp16 = softmax(axis = var_3352, x = aw_497_cast_fp16)[name = tensor<string, []>("op_3516_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3517_cast_fp16 = softmax(axis = var_3352, x = aw_499_cast_fp16)[name = tensor<string, []>("op_3517_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3518_cast_fp16 = softmax(axis = var_3352, x = aw_501_cast_fp16)[name = tensor<string, []>("op_3518_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3519_cast_fp16 = softmax(axis = var_3352, x = aw_503_cast_fp16)[name = tensor<string, []>("op_3519_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3520_cast_fp16 = softmax(axis = var_3352, x = aw_505_cast_fp16)[name = tensor<string, []>("op_3520_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3521_cast_fp16 = softmax(axis = var_3352, x = aw_507_cast_fp16)[name = tensor<string, []>("op_3521_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3522_cast_fp16 = softmax(axis = var_3352, x = aw_509_cast_fp16)[name = tensor<string, []>("op_3522_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3523_cast_fp16 = softmax(axis = var_3352, x = aw_511_cast_fp16)[name = tensor<string, []>("op_3523_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3524_cast_fp16 = softmax(axis = var_3352, x = aw_513_cast_fp16)[name = tensor<string, []>("op_3524_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3525_cast_fp16 = softmax(axis = var_3352, x = aw_515_cast_fp16)[name = tensor<string, []>("op_3525_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3526_cast_fp16 = softmax(axis = var_3352, x = aw_517_cast_fp16)[name = tensor<string, []>("op_3526_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3527_cast_fp16 = softmax(axis = var_3352, x = aw_519_cast_fp16)[name = tensor<string, []>("op_3527_cast_fp16")];
+            tensor<string, []> var_3529_equation_0 = const()[name = tensor<string, []>("op_3529_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3529_cast_fp16 = einsum(equation = var_3529_equation_0, values = (var_3447_cast_fp16_0, var_3508_cast_fp16))[name = tensor<string, []>("op_3529_cast_fp16")];
+            tensor<string, []> var_3531_equation_0 = const()[name = tensor<string, []>("op_3531_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3531_cast_fp16 = einsum(equation = var_3531_equation_0, values = (var_3447_cast_fp16_1, var_3509_cast_fp16))[name = tensor<string, []>("op_3531_cast_fp16")];
+            tensor<string, []> var_3533_equation_0 = const()[name = tensor<string, []>("op_3533_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3533_cast_fp16 = einsum(equation = var_3533_equation_0, values = (var_3447_cast_fp16_2, var_3510_cast_fp16))[name = tensor<string, []>("op_3533_cast_fp16")];
+            tensor<string, []> var_3535_equation_0 = const()[name = tensor<string, []>("op_3535_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3535_cast_fp16 = einsum(equation = var_3535_equation_0, values = (var_3447_cast_fp16_3, var_3511_cast_fp16))[name = tensor<string, []>("op_3535_cast_fp16")];
+            tensor<string, []> var_3537_equation_0 = const()[name = tensor<string, []>("op_3537_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3537_cast_fp16 = einsum(equation = var_3537_equation_0, values = (var_3447_cast_fp16_4, var_3512_cast_fp16))[name = tensor<string, []>("op_3537_cast_fp16")];
+            tensor<string, []> var_3539_equation_0 = const()[name = tensor<string, []>("op_3539_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3539_cast_fp16 = einsum(equation = var_3539_equation_0, values = (var_3447_cast_fp16_5, var_3513_cast_fp16))[name = tensor<string, []>("op_3539_cast_fp16")];
+            tensor<string, []> var_3541_equation_0 = const()[name = tensor<string, []>("op_3541_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3541_cast_fp16 = einsum(equation = var_3541_equation_0, values = (var_3447_cast_fp16_6, var_3514_cast_fp16))[name = tensor<string, []>("op_3541_cast_fp16")];
+            tensor<string, []> var_3543_equation_0 = const()[name = tensor<string, []>("op_3543_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3543_cast_fp16 = einsum(equation = var_3543_equation_0, values = (var_3447_cast_fp16_7, var_3515_cast_fp16))[name = tensor<string, []>("op_3543_cast_fp16")];
+            tensor<string, []> var_3545_equation_0 = const()[name = tensor<string, []>("op_3545_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3545_cast_fp16 = einsum(equation = var_3545_equation_0, values = (var_3447_cast_fp16_8, var_3516_cast_fp16))[name = tensor<string, []>("op_3545_cast_fp16")];
+            tensor<string, []> var_3547_equation_0 = const()[name = tensor<string, []>("op_3547_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3547_cast_fp16 = einsum(equation = var_3547_equation_0, values = (var_3447_cast_fp16_9, var_3517_cast_fp16))[name = tensor<string, []>("op_3547_cast_fp16")];
+            tensor<string, []> var_3549_equation_0 = const()[name = tensor<string, []>("op_3549_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3549_cast_fp16 = einsum(equation = var_3549_equation_0, values = (var_3447_cast_fp16_10, var_3518_cast_fp16))[name = tensor<string, []>("op_3549_cast_fp16")];
+            tensor<string, []> var_3551_equation_0 = const()[name = tensor<string, []>("op_3551_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3551_cast_fp16 = einsum(equation = var_3551_equation_0, values = (var_3447_cast_fp16_11, var_3519_cast_fp16))[name = tensor<string, []>("op_3551_cast_fp16")];
+            tensor<string, []> var_3553_equation_0 = const()[name = tensor<string, []>("op_3553_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3553_cast_fp16 = einsum(equation = var_3553_equation_0, values = (var_3447_cast_fp16_12, var_3520_cast_fp16))[name = tensor<string, []>("op_3553_cast_fp16")];
+            tensor<string, []> var_3555_equation_0 = const()[name = tensor<string, []>("op_3555_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3555_cast_fp16 = einsum(equation = var_3555_equation_0, values = (var_3447_cast_fp16_13, var_3521_cast_fp16))[name = tensor<string, []>("op_3555_cast_fp16")];
+            tensor<string, []> var_3557_equation_0 = const()[name = tensor<string, []>("op_3557_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3557_cast_fp16 = einsum(equation = var_3557_equation_0, values = (var_3447_cast_fp16_14, var_3522_cast_fp16))[name = tensor<string, []>("op_3557_cast_fp16")];
+            tensor<string, []> var_3559_equation_0 = const()[name = tensor<string, []>("op_3559_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3559_cast_fp16 = einsum(equation = var_3559_equation_0, values = (var_3447_cast_fp16_15, var_3523_cast_fp16))[name = tensor<string, []>("op_3559_cast_fp16")];
+            tensor<string, []> var_3561_equation_0 = const()[name = tensor<string, []>("op_3561_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3561_cast_fp16 = einsum(equation = var_3561_equation_0, values = (var_3447_cast_fp16_16, var_3524_cast_fp16))[name = tensor<string, []>("op_3561_cast_fp16")];
+            tensor<string, []> var_3563_equation_0 = const()[name = tensor<string, []>("op_3563_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3563_cast_fp16 = einsum(equation = var_3563_equation_0, values = (var_3447_cast_fp16_17, var_3525_cast_fp16))[name = tensor<string, []>("op_3563_cast_fp16")];
+            tensor<string, []> var_3565_equation_0 = const()[name = tensor<string, []>("op_3565_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3565_cast_fp16 = einsum(equation = var_3565_equation_0, values = (var_3447_cast_fp16_18, var_3526_cast_fp16))[name = tensor<string, []>("op_3565_cast_fp16")];
+            tensor<string, []> var_3567_equation_0 = const()[name = tensor<string, []>("op_3567_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3567_cast_fp16 = einsum(equation = var_3567_equation_0, values = (var_3447_cast_fp16_19, var_3527_cast_fp16))[name = tensor<string, []>("op_3567_cast_fp16")];
+            tensor<bool, []> input_125_interleave_0 = const()[name = tensor<string, []>("input_125_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_125_cast_fp16 = concat(axis = var_3352, interleave = input_125_interleave_0, values = (var_3529_cast_fp16, var_3531_cast_fp16, var_3533_cast_fp16, var_3535_cast_fp16, var_3537_cast_fp16, var_3539_cast_fp16, var_3541_cast_fp16, var_3543_cast_fp16, var_3545_cast_fp16, var_3547_cast_fp16, var_3549_cast_fp16, var_3551_cast_fp16, var_3553_cast_fp16, var_3555_cast_fp16, var_3557_cast_fp16, var_3559_cast_fp16, var_3561_cast_fp16, var_3563_cast_fp16, var_3565_cast_fp16, var_3567_cast_fp16))[name = tensor<string, []>("input_125_cast_fp16")];
+            tensor<string, []> var_3576_pad_type_0 = const()[name = tensor<string, []>("op_3576_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3576_strides_0 = const()[name = tensor<string, []>("op_3576_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3576_pad_0 = const()[name = tensor<string, []>("op_3576_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3576_dilations_0 = const()[name = tensor<string, []>("op_3576_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3576_groups_0 = const()[name = tensor<string, []>("op_3576_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_12_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496370752)))];
+            tensor<fp16, [1280]> blocks_12_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(499647616)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3576_cast_fp16 = conv(bias = blocks_12_attn_out_bias_to_fp16, dilations = var_3576_dilations_0, groups = var_3576_groups_0, pad = var_3576_pad_0, pad_type = var_3576_pad_type_0, strides = var_3576_strides_0, weight = blocks_12_attn_out_weight_to_fp16, x = input_125_cast_fp16)[name = tensor<string, []>("op_3576_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = var_3576_cast_fp16)[name = tensor<string, []>("inputs_51_cast_fp16")];
+            tensor<int32, [1]> input_127_axes_0 = const()[name = tensor<string, []>("input_127_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_127_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_127_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(499650240)))];
+            tensor<fp16, [1280]> input_127_beta_0_to_fp16 = const()[name = tensor<string, []>("input_127_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(499652864)))];
+            tensor<fp16, []> var_3586_to_fp16 = const()[name = tensor<string, []>("op_3586_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_127_cast_fp16 = layer_norm(axes = input_127_axes_0, beta = input_127_beta_0_to_fp16, epsilon = var_3586_to_fp16, gamma = input_127_gamma_0_to_fp16, x = inputs_51_cast_fp16)[name = tensor<string, []>("input_127_cast_fp16")];
+            tensor<string, []> input_129_pad_type_0 = const()[name = tensor<string, []>("input_129_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_129_strides_0 = const()[name = tensor<string, []>("input_129_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_129_pad_0 = const()[name = tensor<string, []>("input_129_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_129_dilations_0 = const()[name = tensor<string, []>("input_129_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_129_groups_0 = const()[name = tensor<string, []>("input_129_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_12_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(499655488)))];
+            tensor<fp16, [5120]> blocks_12_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(512762752)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_129_cast_fp16 = conv(bias = blocks_12_mlp_0_bias_to_fp16, dilations = input_129_dilations_0, groups = input_129_groups_0, pad = input_129_pad_0, pad_type = input_129_pad_type_0, strides = input_129_strides_0, weight = blocks_12_mlp_0_weight_to_fp16, x = input_127_cast_fp16)[name = tensor<string, []>("input_129_cast_fp16")];
+            tensor<string, []> input_131_mode_0 = const()[name = tensor<string, []>("input_131_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_131_cast_fp16 = gelu(mode = input_131_mode_0, x = input_129_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")];
+            tensor<string, []> var_3612_pad_type_0 = const()[name = tensor<string, []>("op_3612_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3612_strides_0 = const()[name = tensor<string, []>("op_3612_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3612_pad_0 = const()[name = tensor<string, []>("op_3612_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3612_dilations_0 = const()[name = tensor<string, []>("op_3612_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3612_groups_0 = const()[name = tensor<string, []>("op_3612_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_12_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(512773056)))];
+            tensor<fp16, [1280]> blocks_12_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(525880320)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3612_cast_fp16 = conv(bias = blocks_12_mlp_2_bias_to_fp16, dilations = var_3612_dilations_0, groups = var_3612_groups_0, pad = var_3612_pad_0, pad_type = var_3612_pad_type_0, strides = var_3612_strides_0, weight = blocks_12_mlp_2_weight_to_fp16, x = input_131_cast_fp16)[name = tensor<string, []>("op_3612_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = var_3612_cast_fp16)[name = tensor<string, []>("inputs_53_cast_fp16")];
+            tensor<int32, []> var_3621 = const()[name = tensor<string, []>("op_3621"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_133_axes_0 = const()[name = tensor<string, []>("input_133_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_133_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_133_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(525882944)))];
+            tensor<fp16, [1280]> input_133_beta_0_to_fp16 = const()[name = tensor<string, []>("input_133_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(525885568)))];
+            tensor<fp16, []> var_3637_to_fp16 = const()[name = tensor<string, []>("op_3637_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_133_cast_fp16 = layer_norm(axes = input_133_axes_0, beta = input_133_beta_0_to_fp16, epsilon = var_3637_to_fp16, gamma = input_133_gamma_0_to_fp16, x = inputs_53_cast_fp16)[name = tensor<string, []>("input_133_cast_fp16")];
+            tensor<string, []> q_27_pad_type_0 = const()[name = tensor<string, []>("q_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_27_strides_0 = const()[name = tensor<string, []>("q_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_27_pad_0 = const()[name = tensor<string, []>("q_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_27_dilations_0 = const()[name = tensor<string, []>("q_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_27_groups_0 = const()[name = tensor<string, []>("q_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_3672_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3672_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(525888192)))];
+            tensor<fp16, [1280]> var_3672_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3672_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(529165056)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3672_cast_fp16 = conv(bias = var_3672_bias_0_to_fp16, dilations = q_27_dilations_0, groups = q_27_groups_0, pad = q_27_pad_0, pad_type = q_27_pad_type_0, strides = q_27_strides_0, weight = var_3672_weight_0_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("op_3672_cast_fp16")];
+            tensor<string, []> k_27_pad_type_0 = const()[name = tensor<string, []>("k_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_27_strides_0 = const()[name = tensor<string, []>("k_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_27_pad_0 = const()[name = tensor<string, []>("k_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_27_dilations_0 = const()[name = tensor<string, []>("k_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_27_groups_0 = const()[name = tensor<string, []>("k_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_13_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(529167680)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_27_cast_fp16 = conv(dilations = k_27_dilations_0, groups = k_27_groups_0, pad = k_27_pad_0, pad_type = k_27_pad_type_0, strides = k_27_strides_0, weight = blocks_13_attn_key_weight_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("k_27_cast_fp16")];
+            tensor<string, []> var_3670_pad_type_0 = const()[name = tensor<string, []>("op_3670_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3670_strides_0 = const()[name = tensor<string, []>("op_3670_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3670_pad_0 = const()[name = tensor<string, []>("op_3670_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3670_dilations_0 = const()[name = tensor<string, []>("op_3670_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3670_groups_0 = const()[name = tensor<string, []>("op_3670_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_13_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(532444544)))];
+            tensor<fp16, [1280]> blocks_13_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(535721408)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3670_cast_fp16 = conv(bias = blocks_13_attn_value_bias_to_fp16, dilations = var_3670_dilations_0, groups = var_3670_groups_0, pad = var_3670_pad_0, pad_type = var_3670_pad_type_0, strides = var_3670_strides_0, weight = blocks_13_attn_value_weight_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("op_3670_cast_fp16")];
+            tensor<int32, [20]> tile_39 = const()[name = tensor<string, []>("tile_39"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3673_axis_0 = const()[name = tensor<string, []>("op_3673_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_19 = split(axis = var_3673_axis_0, split_sizes = tile_39, x = var_3672_cast_fp16)[name = tensor<string, []>("op_3673_cast_fp16")];
+            tensor<int32, [4]> var_3694_perm_0 = const()[name = tensor<string, []>("op_3694_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_40 = const()[name = tensor<string, []>("tile_40"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3695_axis_0 = const()[name = tensor<string, []>("op_3695_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_3694_cast_fp16 = transpose(perm = var_3694_perm_0, x = k_27_cast_fp16)[name = tensor<string, []>("transpose_19")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_19 = split(axis = var_3695_axis_0, split_sizes = tile_40, x = var_3694_cast_fp16)[name = tensor<string, []>("op_3695_cast_fp16")];
+            tensor<int32, [20]> tile_41 = const()[name = tensor<string, []>("tile_41"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3716_axis_0 = const()[name = tensor<string, []>("op_3716_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_19 = split(axis = var_3716_axis_0, split_sizes = tile_41, x = var_3670_cast_fp16)[name = tensor<string, []>("op_3716_cast_fp16")];
+            tensor<string, []> aw_521_equation_0 = const()[name = tensor<string, []>("aw_521_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_521_cast_fp16 = einsum(equation = aw_521_equation_0, values = (var_3695_cast_fp16_0, var_3673_cast_fp16_0))[name = tensor<string, []>("aw_521_cast_fp16")];
+            tensor<string, []> aw_523_equation_0 = const()[name = tensor<string, []>("aw_523_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_523_cast_fp16 = einsum(equation = aw_523_equation_0, values = (var_3695_cast_fp16_1, var_3673_cast_fp16_1))[name = tensor<string, []>("aw_523_cast_fp16")];
+            tensor<string, []> aw_525_equation_0 = const()[name = tensor<string, []>("aw_525_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_525_cast_fp16 = einsum(equation = aw_525_equation_0, values = (var_3695_cast_fp16_2, var_3673_cast_fp16_2))[name = tensor<string, []>("aw_525_cast_fp16")];
+            tensor<string, []> aw_527_equation_0 = const()[name = tensor<string, []>("aw_527_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_527_cast_fp16 = einsum(equation = aw_527_equation_0, values = (var_3695_cast_fp16_3, var_3673_cast_fp16_3))[name = tensor<string, []>("aw_527_cast_fp16")];
+            tensor<string, []> aw_529_equation_0 = const()[name = tensor<string, []>("aw_529_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_529_cast_fp16 = einsum(equation = aw_529_equation_0, values = (var_3695_cast_fp16_4, var_3673_cast_fp16_4))[name = tensor<string, []>("aw_529_cast_fp16")];
+            tensor<string, []> aw_531_equation_0 = const()[name = tensor<string, []>("aw_531_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_531_cast_fp16 = einsum(equation = aw_531_equation_0, values = (var_3695_cast_fp16_5, var_3673_cast_fp16_5))[name = tensor<string, []>("aw_531_cast_fp16")];
+            tensor<string, []> aw_533_equation_0 = const()[name = tensor<string, []>("aw_533_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_533_cast_fp16 = einsum(equation = aw_533_equation_0, values = (var_3695_cast_fp16_6, var_3673_cast_fp16_6))[name = tensor<string, []>("aw_533_cast_fp16")];
+            tensor<string, []> aw_535_equation_0 = const()[name = tensor<string, []>("aw_535_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_535_cast_fp16 = einsum(equation = aw_535_equation_0, values = (var_3695_cast_fp16_7, var_3673_cast_fp16_7))[name = tensor<string, []>("aw_535_cast_fp16")];
+            tensor<string, []> aw_537_equation_0 = const()[name = tensor<string, []>("aw_537_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_537_cast_fp16 = einsum(equation = aw_537_equation_0, values = (var_3695_cast_fp16_8, var_3673_cast_fp16_8))[name = tensor<string, []>("aw_537_cast_fp16")];
+            tensor<string, []> aw_539_equation_0 = const()[name = tensor<string, []>("aw_539_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_539_cast_fp16 = einsum(equation = aw_539_equation_0, values = (var_3695_cast_fp16_9, var_3673_cast_fp16_9))[name = tensor<string, []>("aw_539_cast_fp16")];
+            tensor<string, []> aw_541_equation_0 = const()[name = tensor<string, []>("aw_541_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_541_cast_fp16 = einsum(equation = aw_541_equation_0, values = (var_3695_cast_fp16_10, var_3673_cast_fp16_10))[name = tensor<string, []>("aw_541_cast_fp16")];
+            tensor<string, []> aw_543_equation_0 = const()[name = tensor<string, []>("aw_543_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_543_cast_fp16 = einsum(equation = aw_543_equation_0, values = (var_3695_cast_fp16_11, var_3673_cast_fp16_11))[name = tensor<string, []>("aw_543_cast_fp16")];
+            tensor<string, []> aw_545_equation_0 = const()[name = tensor<string, []>("aw_545_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_545_cast_fp16 = einsum(equation = aw_545_equation_0, values = (var_3695_cast_fp16_12, var_3673_cast_fp16_12))[name = tensor<string, []>("aw_545_cast_fp16")];
+            tensor<string, []> aw_547_equation_0 = const()[name = tensor<string, []>("aw_547_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_547_cast_fp16 = einsum(equation = aw_547_equation_0, values = (var_3695_cast_fp16_13, var_3673_cast_fp16_13))[name = tensor<string, []>("aw_547_cast_fp16")];
+            tensor<string, []> aw_549_equation_0 = const()[name = tensor<string, []>("aw_549_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_549_cast_fp16 = einsum(equation = aw_549_equation_0, values = (var_3695_cast_fp16_14, var_3673_cast_fp16_14))[name = tensor<string, []>("aw_549_cast_fp16")];
+            tensor<string, []> aw_551_equation_0 = const()[name = tensor<string, []>("aw_551_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_551_cast_fp16 = einsum(equation = aw_551_equation_0, values = (var_3695_cast_fp16_15, var_3673_cast_fp16_15))[name = tensor<string, []>("aw_551_cast_fp16")];
+            tensor<string, []> aw_553_equation_0 = const()[name = tensor<string, []>("aw_553_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_553_cast_fp16 = einsum(equation = aw_553_equation_0, values = (var_3695_cast_fp16_16, var_3673_cast_fp16_16))[name = tensor<string, []>("aw_553_cast_fp16")];
+            tensor<string, []> aw_555_equation_0 = const()[name = tensor<string, []>("aw_555_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_555_cast_fp16 = einsum(equation = aw_555_equation_0, values = (var_3695_cast_fp16_17, var_3673_cast_fp16_17))[name = tensor<string, []>("aw_555_cast_fp16")];
+            tensor<string, []> aw_557_equation_0 = const()[name = tensor<string, []>("aw_557_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_557_cast_fp16 = einsum(equation = aw_557_equation_0, values = (var_3695_cast_fp16_18, var_3673_cast_fp16_18))[name = tensor<string, []>("aw_557_cast_fp16")];
+            tensor<string, []> aw_559_equation_0 = const()[name = tensor<string, []>("aw_559_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_559_cast_fp16 = einsum(equation = aw_559_equation_0, values = (var_3695_cast_fp16_19, var_3673_cast_fp16_19))[name = tensor<string, []>("aw_559_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3777_cast_fp16 = softmax(axis = var_3621, x = aw_521_cast_fp16)[name = tensor<string, []>("op_3777_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3778_cast_fp16 = softmax(axis = var_3621, x = aw_523_cast_fp16)[name = tensor<string, []>("op_3778_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3779_cast_fp16 = softmax(axis = var_3621, x = aw_525_cast_fp16)[name = tensor<string, []>("op_3779_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3780_cast_fp16 = softmax(axis = var_3621, x = aw_527_cast_fp16)[name = tensor<string, []>("op_3780_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3781_cast_fp16 = softmax(axis = var_3621, x = aw_529_cast_fp16)[name = tensor<string, []>("op_3781_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3782_cast_fp16 = softmax(axis = var_3621, x = aw_531_cast_fp16)[name = tensor<string, []>("op_3782_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3783_cast_fp16 = softmax(axis = var_3621, x = aw_533_cast_fp16)[name = tensor<string, []>("op_3783_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3784_cast_fp16 = softmax(axis = var_3621, x = aw_535_cast_fp16)[name = tensor<string, []>("op_3784_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3785_cast_fp16 = softmax(axis = var_3621, x = aw_537_cast_fp16)[name = tensor<string, []>("op_3785_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3786_cast_fp16 = softmax(axis = var_3621, x = aw_539_cast_fp16)[name = tensor<string, []>("op_3786_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3787_cast_fp16 = softmax(axis = var_3621, x = aw_541_cast_fp16)[name = tensor<string, []>("op_3787_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3788_cast_fp16 = softmax(axis = var_3621, x = aw_543_cast_fp16)[name = tensor<string, []>("op_3788_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3789_cast_fp16 = softmax(axis = var_3621, x = aw_545_cast_fp16)[name = tensor<string, []>("op_3789_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3790_cast_fp16 = softmax(axis = var_3621, x = aw_547_cast_fp16)[name = tensor<string, []>("op_3790_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3791_cast_fp16 = softmax(axis = var_3621, x = aw_549_cast_fp16)[name = tensor<string, []>("op_3791_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3792_cast_fp16 = softmax(axis = var_3621, x = aw_551_cast_fp16)[name = tensor<string, []>("op_3792_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3793_cast_fp16 = softmax(axis = var_3621, x = aw_553_cast_fp16)[name = tensor<string, []>("op_3793_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3794_cast_fp16 = softmax(axis = var_3621, x = aw_555_cast_fp16)[name = tensor<string, []>("op_3794_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3795_cast_fp16 = softmax(axis = var_3621, x = aw_557_cast_fp16)[name = tensor<string, []>("op_3795_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3796_cast_fp16 = softmax(axis = var_3621, x = aw_559_cast_fp16)[name = tensor<string, []>("op_3796_cast_fp16")];
+            tensor<string, []> var_3798_equation_0 = const()[name = tensor<string, []>("op_3798_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3798_cast_fp16 = einsum(equation = var_3798_equation_0, values = (var_3716_cast_fp16_0, var_3777_cast_fp16))[name = tensor<string, []>("op_3798_cast_fp16")];
+            tensor<string, []> var_3800_equation_0 = const()[name = tensor<string, []>("op_3800_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3800_cast_fp16 = einsum(equation = var_3800_equation_0, values = (var_3716_cast_fp16_1, var_3778_cast_fp16))[name = tensor<string, []>("op_3800_cast_fp16")];
+            tensor<string, []> var_3802_equation_0 = const()[name = tensor<string, []>("op_3802_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3802_cast_fp16 = einsum(equation = var_3802_equation_0, values = (var_3716_cast_fp16_2, var_3779_cast_fp16))[name = tensor<string, []>("op_3802_cast_fp16")];
+            tensor<string, []> var_3804_equation_0 = const()[name = tensor<string, []>("op_3804_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3804_cast_fp16 = einsum(equation = var_3804_equation_0, values = (var_3716_cast_fp16_3, var_3780_cast_fp16))[name = tensor<string, []>("op_3804_cast_fp16")];
+            tensor<string, []> var_3806_equation_0 = const()[name = tensor<string, []>("op_3806_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3806_cast_fp16 = einsum(equation = var_3806_equation_0, values = (var_3716_cast_fp16_4, var_3781_cast_fp16))[name = tensor<string, []>("op_3806_cast_fp16")];
+            tensor<string, []> var_3808_equation_0 = const()[name = tensor<string, []>("op_3808_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3808_cast_fp16 = einsum(equation = var_3808_equation_0, values = (var_3716_cast_fp16_5, var_3782_cast_fp16))[name = tensor<string, []>("op_3808_cast_fp16")];
+            tensor<string, []> var_3810_equation_0 = const()[name = tensor<string, []>("op_3810_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3810_cast_fp16 = einsum(equation = var_3810_equation_0, values = (var_3716_cast_fp16_6, var_3783_cast_fp16))[name = tensor<string, []>("op_3810_cast_fp16")];
+            tensor<string, []> var_3812_equation_0 = const()[name = tensor<string, []>("op_3812_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3812_cast_fp16 = einsum(equation = var_3812_equation_0, values = (var_3716_cast_fp16_7, var_3784_cast_fp16))[name = tensor<string, []>("op_3812_cast_fp16")];
+            tensor<string, []> var_3814_equation_0 = const()[name = tensor<string, []>("op_3814_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3814_cast_fp16 = einsum(equation = var_3814_equation_0, values = (var_3716_cast_fp16_8, var_3785_cast_fp16))[name = tensor<string, []>("op_3814_cast_fp16")];
+            tensor<string, []> var_3816_equation_0 = const()[name = tensor<string, []>("op_3816_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3816_cast_fp16 = einsum(equation = var_3816_equation_0, values = (var_3716_cast_fp16_9, var_3786_cast_fp16))[name = tensor<string, []>("op_3816_cast_fp16")];
+            tensor<string, []> var_3818_equation_0 = const()[name = tensor<string, []>("op_3818_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3818_cast_fp16 = einsum(equation = var_3818_equation_0, values = (var_3716_cast_fp16_10, var_3787_cast_fp16))[name = tensor<string, []>("op_3818_cast_fp16")];
+            tensor<string, []> var_3820_equation_0 = const()[name = tensor<string, []>("op_3820_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3820_cast_fp16 = einsum(equation = var_3820_equation_0, values = (var_3716_cast_fp16_11, var_3788_cast_fp16))[name = tensor<string, []>("op_3820_cast_fp16")];
+            tensor<string, []> var_3822_equation_0 = const()[name = tensor<string, []>("op_3822_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3822_cast_fp16 = einsum(equation = var_3822_equation_0, values = (var_3716_cast_fp16_12, var_3789_cast_fp16))[name = tensor<string, []>("op_3822_cast_fp16")];
+            tensor<string, []> var_3824_equation_0 = const()[name = tensor<string, []>("op_3824_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3824_cast_fp16 = einsum(equation = var_3824_equation_0, values = (var_3716_cast_fp16_13, var_3790_cast_fp16))[name = tensor<string, []>("op_3824_cast_fp16")];
+            tensor<string, []> var_3826_equation_0 = const()[name = tensor<string, []>("op_3826_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3826_cast_fp16 = einsum(equation = var_3826_equation_0, values = (var_3716_cast_fp16_14, var_3791_cast_fp16))[name = tensor<string, []>("op_3826_cast_fp16")];
+            tensor<string, []> var_3828_equation_0 = const()[name = tensor<string, []>("op_3828_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3828_cast_fp16 = einsum(equation = var_3828_equation_0, values = (var_3716_cast_fp16_15, var_3792_cast_fp16))[name = tensor<string, []>("op_3828_cast_fp16")];
+            tensor<string, []> var_3830_equation_0 = const()[name = tensor<string, []>("op_3830_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3830_cast_fp16 = einsum(equation = var_3830_equation_0, values = (var_3716_cast_fp16_16, var_3793_cast_fp16))[name = tensor<string, []>("op_3830_cast_fp16")];
+            tensor<string, []> var_3832_equation_0 = const()[name = tensor<string, []>("op_3832_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3832_cast_fp16 = einsum(equation = var_3832_equation_0, values = (var_3716_cast_fp16_17, var_3794_cast_fp16))[name = tensor<string, []>("op_3832_cast_fp16")];
+            tensor<string, []> var_3834_equation_0 = const()[name = tensor<string, []>("op_3834_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3834_cast_fp16 = einsum(equation = var_3834_equation_0, values = (var_3716_cast_fp16_18, var_3795_cast_fp16))[name = tensor<string, []>("op_3834_cast_fp16")];
+            tensor<string, []> var_3836_equation_0 = const()[name = tensor<string, []>("op_3836_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3836_cast_fp16 = einsum(equation = var_3836_equation_0, values = (var_3716_cast_fp16_19, var_3796_cast_fp16))[name = tensor<string, []>("op_3836_cast_fp16")];
+            tensor<bool, []> input_135_interleave_0 = const()[name = tensor<string, []>("input_135_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_135_cast_fp16 = concat(axis = var_3621, interleave = input_135_interleave_0, values = (var_3798_cast_fp16, var_3800_cast_fp16, var_3802_cast_fp16, var_3804_cast_fp16, var_3806_cast_fp16, var_3808_cast_fp16, var_3810_cast_fp16, var_3812_cast_fp16, var_3814_cast_fp16, var_3816_cast_fp16, var_3818_cast_fp16, var_3820_cast_fp16, var_3822_cast_fp16, var_3824_cast_fp16, var_3826_cast_fp16, var_3828_cast_fp16, var_3830_cast_fp16, var_3832_cast_fp16, var_3834_cast_fp16, var_3836_cast_fp16))[name = tensor<string, []>("input_135_cast_fp16")];
+            tensor<string, []> var_3845_pad_type_0 = const()[name = tensor<string, []>("op_3845_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3845_strides_0 = const()[name = tensor<string, []>("op_3845_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3845_pad_0 = const()[name = tensor<string, []>("op_3845_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3845_dilations_0 = const()[name = tensor<string, []>("op_3845_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3845_groups_0 = const()[name = tensor<string, []>("op_3845_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_13_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(535724032)))];
+            tensor<fp16, [1280]> blocks_13_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(539000896)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3845_cast_fp16 = conv(bias = blocks_13_attn_out_bias_to_fp16, dilations = var_3845_dilations_0, groups = var_3845_groups_0, pad = var_3845_pad_0, pad_type = var_3845_pad_type_0, strides = var_3845_strides_0, weight = blocks_13_attn_out_weight_to_fp16, x = input_135_cast_fp16)[name = tensor<string, []>("op_3845_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = var_3845_cast_fp16)[name = tensor<string, []>("inputs_55_cast_fp16")];
+            tensor<int32, [1]> input_137_axes_0 = const()[name = tensor<string, []>("input_137_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_137_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_137_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(539003520)))];
+            tensor<fp16, [1280]> input_137_beta_0_to_fp16 = const()[name = tensor<string, []>("input_137_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(539006144)))];
+            tensor<fp16, []> var_3855_to_fp16 = const()[name = tensor<string, []>("op_3855_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_137_cast_fp16 = layer_norm(axes = input_137_axes_0, beta = input_137_beta_0_to_fp16, epsilon = var_3855_to_fp16, gamma = input_137_gamma_0_to_fp16, x = inputs_55_cast_fp16)[name = tensor<string, []>("input_137_cast_fp16")];
+            tensor<string, []> input_139_pad_type_0 = const()[name = tensor<string, []>("input_139_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_139_strides_0 = const()[name = tensor<string, []>("input_139_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_139_pad_0 = const()[name = tensor<string, []>("input_139_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_139_dilations_0 = const()[name = tensor<string, []>("input_139_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_139_groups_0 = const()[name = tensor<string, []>("input_139_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_13_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(539008768)))];
+            tensor<fp16, [5120]> blocks_13_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(552116032)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_139_cast_fp16 = conv(bias = blocks_13_mlp_0_bias_to_fp16, dilations = input_139_dilations_0, groups = input_139_groups_0, pad = input_139_pad_0, pad_type = input_139_pad_type_0, strides = input_139_strides_0, weight = blocks_13_mlp_0_weight_to_fp16, x = input_137_cast_fp16)[name = tensor<string, []>("input_139_cast_fp16")];
+            tensor<string, []> input_141_mode_0 = const()[name = tensor<string, []>("input_141_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_141_cast_fp16 = gelu(mode = input_141_mode_0, x = input_139_cast_fp16)[name = tensor<string, []>("input_141_cast_fp16")];
+            tensor<string, []> var_3881_pad_type_0 = const()[name = tensor<string, []>("op_3881_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3881_strides_0 = const()[name = tensor<string, []>("op_3881_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3881_pad_0 = const()[name = tensor<string, []>("op_3881_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3881_dilations_0 = const()[name = tensor<string, []>("op_3881_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3881_groups_0 = const()[name = tensor<string, []>("op_3881_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_13_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(552126336)))];
+            tensor<fp16, [1280]> blocks_13_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(565233600)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3881_cast_fp16 = conv(bias = blocks_13_mlp_2_bias_to_fp16, dilations = var_3881_dilations_0, groups = var_3881_groups_0, pad = var_3881_pad_0, pad_type = var_3881_pad_type_0, strides = var_3881_strides_0, weight = blocks_13_mlp_2_weight_to_fp16, x = input_141_cast_fp16)[name = tensor<string, []>("op_3881_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = var_3881_cast_fp16)[name = tensor<string, []>("inputs_57_cast_fp16")];
+            tensor<int32, []> var_3890 = const()[name = tensor<string, []>("op_3890"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_143_axes_0 = const()[name = tensor<string, []>("input_143_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_143_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_143_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(565236224)))];
+            tensor<fp16, [1280]> input_143_beta_0_to_fp16 = const()[name = tensor<string, []>("input_143_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(565238848)))];
+            tensor<fp16, []> var_3906_to_fp16 = const()[name = tensor<string, []>("op_3906_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_143_cast_fp16 = layer_norm(axes = input_143_axes_0, beta = input_143_beta_0_to_fp16, epsilon = var_3906_to_fp16, gamma = input_143_gamma_0_to_fp16, x = inputs_57_cast_fp16)[name = tensor<string, []>("input_143_cast_fp16")];
+            tensor<string, []> q_29_pad_type_0 = const()[name = tensor<string, []>("q_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_29_strides_0 = const()[name = tensor<string, []>("q_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_29_pad_0 = const()[name = tensor<string, []>("q_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_29_dilations_0 = const()[name = tensor<string, []>("q_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_29_groups_0 = const()[name = tensor<string, []>("q_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_3941_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3941_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(565241472)))];
+            tensor<fp16, [1280]> var_3941_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3941_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(568518336)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3941_cast_fp16 = conv(bias = var_3941_bias_0_to_fp16, dilations = q_29_dilations_0, groups = q_29_groups_0, pad = q_29_pad_0, pad_type = q_29_pad_type_0, strides = q_29_strides_0, weight = var_3941_weight_0_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("op_3941_cast_fp16")];
+            tensor<string, []> k_29_pad_type_0 = const()[name = tensor<string, []>("k_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_29_strides_0 = const()[name = tensor<string, []>("k_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_29_pad_0 = const()[name = tensor<string, []>("k_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_29_dilations_0 = const()[name = tensor<string, []>("k_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_29_groups_0 = const()[name = tensor<string, []>("k_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_14_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(568520960)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_29_cast_fp16 = conv(dilations = k_29_dilations_0, groups = k_29_groups_0, pad = k_29_pad_0, pad_type = k_29_pad_type_0, strides = k_29_strides_0, weight = blocks_14_attn_key_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("k_29_cast_fp16")];
+            tensor<string, []> var_3939_pad_type_0 = const()[name = tensor<string, []>("op_3939_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3939_strides_0 = const()[name = tensor<string, []>("op_3939_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3939_pad_0 = const()[name = tensor<string, []>("op_3939_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3939_dilations_0 = const()[name = tensor<string, []>("op_3939_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3939_groups_0 = const()[name = tensor<string, []>("op_3939_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_14_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(571797824)))];
+            tensor<fp16, [1280]> blocks_14_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(575074688)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3939_cast_fp16 = conv(bias = blocks_14_attn_value_bias_to_fp16, dilations = var_3939_dilations_0, groups = var_3939_groups_0, pad = var_3939_pad_0, pad_type = var_3939_pad_type_0, strides = var_3939_strides_0, weight = blocks_14_attn_value_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("op_3939_cast_fp16")];
+            tensor<int32, [20]> tile_42 = const()[name = tensor<string, []>("tile_42"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3942_axis_0 = const()[name = tensor<string, []>("op_3942_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_19 = split(axis = var_3942_axis_0, split_sizes = tile_42, x = var_3941_cast_fp16)[name = tensor<string, []>("op_3942_cast_fp16")];
+            tensor<int32, [4]> var_3963_perm_0 = const()[name = tensor<string, []>("op_3963_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_43 = const()[name = tensor<string, []>("tile_43"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3964_axis_0 = const()[name = tensor<string, []>("op_3964_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_3963_cast_fp16 = transpose(perm = var_3963_perm_0, x = k_29_cast_fp16)[name = tensor<string, []>("transpose_18")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_19 = split(axis = var_3964_axis_0, split_sizes = tile_43, x = var_3963_cast_fp16)[name = tensor<string, []>("op_3964_cast_fp16")];
+            tensor<int32, [20]> tile_44 = const()[name = tensor<string, []>("tile_44"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3985_axis_0 = const()[name = tensor<string, []>("op_3985_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_19 = split(axis = var_3985_axis_0, split_sizes = tile_44, x = var_3939_cast_fp16)[name = tensor<string, []>("op_3985_cast_fp16")];
+            tensor<string, []> aw_561_equation_0 = const()[name = tensor<string, []>("aw_561_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_561_cast_fp16 = einsum(equation = aw_561_equation_0, values = (var_3964_cast_fp16_0, var_3942_cast_fp16_0))[name = tensor<string, []>("aw_561_cast_fp16")];
+            tensor<string, []> aw_563_equation_0 = const()[name = tensor<string, []>("aw_563_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_563_cast_fp16 = einsum(equation = aw_563_equation_0, values = (var_3964_cast_fp16_1, var_3942_cast_fp16_1))[name = tensor<string, []>("aw_563_cast_fp16")];
+            tensor<string, []> aw_565_equation_0 = const()[name = tensor<string, []>("aw_565_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_565_cast_fp16 = einsum(equation = aw_565_equation_0, values = (var_3964_cast_fp16_2, var_3942_cast_fp16_2))[name = tensor<string, []>("aw_565_cast_fp16")];
+            tensor<string, []> aw_567_equation_0 = const()[name = tensor<string, []>("aw_567_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_567_cast_fp16 = einsum(equation = aw_567_equation_0, values = (var_3964_cast_fp16_3, var_3942_cast_fp16_3))[name = tensor<string, []>("aw_567_cast_fp16")];
+            tensor<string, []> aw_569_equation_0 = const()[name = tensor<string, []>("aw_569_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_569_cast_fp16 = einsum(equation = aw_569_equation_0, values = (var_3964_cast_fp16_4, var_3942_cast_fp16_4))[name = tensor<string, []>("aw_569_cast_fp16")];
+            tensor<string, []> aw_571_equation_0 = const()[name = tensor<string, []>("aw_571_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_571_cast_fp16 = einsum(equation = aw_571_equation_0, values = (var_3964_cast_fp16_5, var_3942_cast_fp16_5))[name = tensor<string, []>("aw_571_cast_fp16")];
+            tensor<string, []> aw_573_equation_0 = const()[name = tensor<string, []>("aw_573_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_573_cast_fp16 = einsum(equation = aw_573_equation_0, values = (var_3964_cast_fp16_6, var_3942_cast_fp16_6))[name = tensor<string, []>("aw_573_cast_fp16")];
+            tensor<string, []> aw_575_equation_0 = const()[name = tensor<string, []>("aw_575_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_575_cast_fp16 = einsum(equation = aw_575_equation_0, values = (var_3964_cast_fp16_7, var_3942_cast_fp16_7))[name = tensor<string, []>("aw_575_cast_fp16")];
+            tensor<string, []> aw_577_equation_0 = const()[name = tensor<string, []>("aw_577_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_577_cast_fp16 = einsum(equation = aw_577_equation_0, values = (var_3964_cast_fp16_8, var_3942_cast_fp16_8))[name = tensor<string, []>("aw_577_cast_fp16")];
+            tensor<string, []> aw_579_equation_0 = const()[name = tensor<string, []>("aw_579_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_579_cast_fp16 = einsum(equation = aw_579_equation_0, values = (var_3964_cast_fp16_9, var_3942_cast_fp16_9))[name = tensor<string, []>("aw_579_cast_fp16")];
+            tensor<string, []> aw_581_equation_0 = const()[name = tensor<string, []>("aw_581_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_581_cast_fp16 = einsum(equation = aw_581_equation_0, values = (var_3964_cast_fp16_10, var_3942_cast_fp16_10))[name = tensor<string, []>("aw_581_cast_fp16")];
+            tensor<string, []> aw_583_equation_0 = const()[name = tensor<string, []>("aw_583_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_583_cast_fp16 = einsum(equation = aw_583_equation_0, values = (var_3964_cast_fp16_11, var_3942_cast_fp16_11))[name = tensor<string, []>("aw_583_cast_fp16")];
+            tensor<string, []> aw_585_equation_0 = const()[name = tensor<string, []>("aw_585_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_585_cast_fp16 = einsum(equation = aw_585_equation_0, values = (var_3964_cast_fp16_12, var_3942_cast_fp16_12))[name = tensor<string, []>("aw_585_cast_fp16")];
+            tensor<string, []> aw_587_equation_0 = const()[name = tensor<string, []>("aw_587_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_587_cast_fp16 = einsum(equation = aw_587_equation_0, values = (var_3964_cast_fp16_13, var_3942_cast_fp16_13))[name = tensor<string, []>("aw_587_cast_fp16")];
+            tensor<string, []> aw_589_equation_0 = const()[name = tensor<string, []>("aw_589_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_589_cast_fp16 = einsum(equation = aw_589_equation_0, values = (var_3964_cast_fp16_14, var_3942_cast_fp16_14))[name = tensor<string, []>("aw_589_cast_fp16")];
+            tensor<string, []> aw_591_equation_0 = const()[name = tensor<string, []>("aw_591_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_591_cast_fp16 = einsum(equation = aw_591_equation_0, values = (var_3964_cast_fp16_15, var_3942_cast_fp16_15))[name = tensor<string, []>("aw_591_cast_fp16")];
+            tensor<string, []> aw_593_equation_0 = const()[name = tensor<string, []>("aw_593_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_593_cast_fp16 = einsum(equation = aw_593_equation_0, values = (var_3964_cast_fp16_16, var_3942_cast_fp16_16))[name = tensor<string, []>("aw_593_cast_fp16")];
+            tensor<string, []> aw_595_equation_0 = const()[name = tensor<string, []>("aw_595_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_595_cast_fp16 = einsum(equation = aw_595_equation_0, values = (var_3964_cast_fp16_17, var_3942_cast_fp16_17))[name = tensor<string, []>("aw_595_cast_fp16")];
+            tensor<string, []> aw_597_equation_0 = const()[name = tensor<string, []>("aw_597_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_597_cast_fp16 = einsum(equation = aw_597_equation_0, values = (var_3964_cast_fp16_18, var_3942_cast_fp16_18))[name = tensor<string, []>("aw_597_cast_fp16")];
+            tensor<string, []> aw_599_equation_0 = const()[name = tensor<string, []>("aw_599_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_599_cast_fp16 = einsum(equation = aw_599_equation_0, values = (var_3964_cast_fp16_19, var_3942_cast_fp16_19))[name = tensor<string, []>("aw_599_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4046_cast_fp16 = softmax(axis = var_3890, x = aw_561_cast_fp16)[name = tensor<string, []>("op_4046_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4047_cast_fp16 = softmax(axis = var_3890, x = aw_563_cast_fp16)[name = tensor<string, []>("op_4047_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4048_cast_fp16 = softmax(axis = var_3890, x = aw_565_cast_fp16)[name = tensor<string, []>("op_4048_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4049_cast_fp16 = softmax(axis = var_3890, x = aw_567_cast_fp16)[name = tensor<string, []>("op_4049_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4050_cast_fp16 = softmax(axis = var_3890, x = aw_569_cast_fp16)[name = tensor<string, []>("op_4050_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4051_cast_fp16 = softmax(axis = var_3890, x = aw_571_cast_fp16)[name = tensor<string, []>("op_4051_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4052_cast_fp16 = softmax(axis = var_3890, x = aw_573_cast_fp16)[name = tensor<string, []>("op_4052_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4053_cast_fp16 = softmax(axis = var_3890, x = aw_575_cast_fp16)[name = tensor<string, []>("op_4053_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4054_cast_fp16 = softmax(axis = var_3890, x = aw_577_cast_fp16)[name = tensor<string, []>("op_4054_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4055_cast_fp16 = softmax(axis = var_3890, x = aw_579_cast_fp16)[name = tensor<string, []>("op_4055_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4056_cast_fp16 = softmax(axis = var_3890, x = aw_581_cast_fp16)[name = tensor<string, []>("op_4056_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4057_cast_fp16 = softmax(axis = var_3890, x = aw_583_cast_fp16)[name = tensor<string, []>("op_4057_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4058_cast_fp16 = softmax(axis = var_3890, x = aw_585_cast_fp16)[name = tensor<string, []>("op_4058_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4059_cast_fp16 = softmax(axis = var_3890, x = aw_587_cast_fp16)[name = tensor<string, []>("op_4059_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4060_cast_fp16 = softmax(axis = var_3890, x = aw_589_cast_fp16)[name = tensor<string, []>("op_4060_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4061_cast_fp16 = softmax(axis = var_3890, x = aw_591_cast_fp16)[name = tensor<string, []>("op_4061_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4062_cast_fp16 = softmax(axis = var_3890, x = aw_593_cast_fp16)[name = tensor<string, []>("op_4062_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4063_cast_fp16 = softmax(axis = var_3890, x = aw_595_cast_fp16)[name = tensor<string, []>("op_4063_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4064_cast_fp16 = softmax(axis = var_3890, x = aw_597_cast_fp16)[name = tensor<string, []>("op_4064_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4065_cast_fp16 = softmax(axis = var_3890, x = aw_599_cast_fp16)[name = tensor<string, []>("op_4065_cast_fp16")];
+            tensor<string, []> var_4067_equation_0 = const()[name = tensor<string, []>("op_4067_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4067_cast_fp16 = einsum(equation = var_4067_equation_0, values = (var_3985_cast_fp16_0, var_4046_cast_fp16))[name = tensor<string, []>("op_4067_cast_fp16")];
+            tensor<string, []> var_4069_equation_0 = const()[name = tensor<string, []>("op_4069_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4069_cast_fp16 = einsum(equation = var_4069_equation_0, values = (var_3985_cast_fp16_1, var_4047_cast_fp16))[name = tensor<string, []>("op_4069_cast_fp16")];
+            tensor<string, []> var_4071_equation_0 = const()[name = tensor<string, []>("op_4071_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4071_cast_fp16 = einsum(equation = var_4071_equation_0, values = (var_3985_cast_fp16_2, var_4048_cast_fp16))[name = tensor<string, []>("op_4071_cast_fp16")];
+            tensor<string, []> var_4073_equation_0 = const()[name = tensor<string, []>("op_4073_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4073_cast_fp16 = einsum(equation = var_4073_equation_0, values = (var_3985_cast_fp16_3, var_4049_cast_fp16))[name = tensor<string, []>("op_4073_cast_fp16")];
+            tensor<string, []> var_4075_equation_0 = const()[name = tensor<string, []>("op_4075_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4075_cast_fp16 = einsum(equation = var_4075_equation_0, values = (var_3985_cast_fp16_4, var_4050_cast_fp16))[name = tensor<string, []>("op_4075_cast_fp16")];
+            tensor<string, []> var_4077_equation_0 = const()[name = tensor<string, []>("op_4077_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4077_cast_fp16 = einsum(equation = var_4077_equation_0, values = (var_3985_cast_fp16_5, var_4051_cast_fp16))[name = tensor<string, []>("op_4077_cast_fp16")];
+            tensor<string, []> var_4079_equation_0 = const()[name = tensor<string, []>("op_4079_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4079_cast_fp16 = einsum(equation = var_4079_equation_0, values = (var_3985_cast_fp16_6, var_4052_cast_fp16))[name = tensor<string, []>("op_4079_cast_fp16")];
+            tensor<string, []> var_4081_equation_0 = const()[name = tensor<string, []>("op_4081_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4081_cast_fp16 = einsum(equation = var_4081_equation_0, values = (var_3985_cast_fp16_7, var_4053_cast_fp16))[name = tensor<string, []>("op_4081_cast_fp16")];
+            tensor<string, []> var_4083_equation_0 = const()[name = tensor<string, []>("op_4083_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4083_cast_fp16 = einsum(equation = var_4083_equation_0, values = (var_3985_cast_fp16_8, var_4054_cast_fp16))[name = tensor<string, []>("op_4083_cast_fp16")];
+            tensor<string, []> var_4085_equation_0 = const()[name = tensor<string, []>("op_4085_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4085_cast_fp16 = einsum(equation = var_4085_equation_0, values = (var_3985_cast_fp16_9, var_4055_cast_fp16))[name = tensor<string, []>("op_4085_cast_fp16")];
+            tensor<string, []> var_4087_equation_0 = const()[name = tensor<string, []>("op_4087_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4087_cast_fp16 = einsum(equation = var_4087_equation_0, values = (var_3985_cast_fp16_10, var_4056_cast_fp16))[name = tensor<string, []>("op_4087_cast_fp16")];
+            tensor<string, []> var_4089_equation_0 = const()[name = tensor<string, []>("op_4089_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4089_cast_fp16 = einsum(equation = var_4089_equation_0, values = (var_3985_cast_fp16_11, var_4057_cast_fp16))[name = tensor<string, []>("op_4089_cast_fp16")];
+            tensor<string, []> var_4091_equation_0 = const()[name = tensor<string, []>("op_4091_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4091_cast_fp16 = einsum(equation = var_4091_equation_0, values = (var_3985_cast_fp16_12, var_4058_cast_fp16))[name = tensor<string, []>("op_4091_cast_fp16")];
+            tensor<string, []> var_4093_equation_0 = const()[name = tensor<string, []>("op_4093_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4093_cast_fp16 = einsum(equation = var_4093_equation_0, values = (var_3985_cast_fp16_13, var_4059_cast_fp16))[name = tensor<string, []>("op_4093_cast_fp16")];
+            tensor<string, []> var_4095_equation_0 = const()[name = tensor<string, []>("op_4095_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4095_cast_fp16 = einsum(equation = var_4095_equation_0, values = (var_3985_cast_fp16_14, var_4060_cast_fp16))[name = tensor<string, []>("op_4095_cast_fp16")];
+            tensor<string, []> var_4097_equation_0 = const()[name = tensor<string, []>("op_4097_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4097_cast_fp16 = einsum(equation = var_4097_equation_0, values = (var_3985_cast_fp16_15, var_4061_cast_fp16))[name = tensor<string, []>("op_4097_cast_fp16")];
+            tensor<string, []> var_4099_equation_0 = const()[name = tensor<string, []>("op_4099_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4099_cast_fp16 = einsum(equation = var_4099_equation_0, values = (var_3985_cast_fp16_16, var_4062_cast_fp16))[name = tensor<string, []>("op_4099_cast_fp16")];
+            tensor<string, []> var_4101_equation_0 = const()[name = tensor<string, []>("op_4101_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4101_cast_fp16 = einsum(equation = var_4101_equation_0, values = (var_3985_cast_fp16_17, var_4063_cast_fp16))[name = tensor<string, []>("op_4101_cast_fp16")];
+            tensor<string, []> var_4103_equation_0 = const()[name = tensor<string, []>("op_4103_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4103_cast_fp16 = einsum(equation = var_4103_equation_0, values = (var_3985_cast_fp16_18, var_4064_cast_fp16))[name = tensor<string, []>("op_4103_cast_fp16")];
+            tensor<string, []> var_4105_equation_0 = const()[name = tensor<string, []>("op_4105_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4105_cast_fp16 = einsum(equation = var_4105_equation_0, values = (var_3985_cast_fp16_19, var_4065_cast_fp16))[name = tensor<string, []>("op_4105_cast_fp16")];
+            tensor<bool, []> input_145_interleave_0 = const()[name = tensor<string, []>("input_145_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_145_cast_fp16 = concat(axis = var_3890, interleave = input_145_interleave_0, values = (var_4067_cast_fp16, var_4069_cast_fp16, var_4071_cast_fp16, var_4073_cast_fp16, var_4075_cast_fp16, var_4077_cast_fp16, var_4079_cast_fp16, var_4081_cast_fp16, var_4083_cast_fp16, var_4085_cast_fp16, var_4087_cast_fp16, var_4089_cast_fp16, var_4091_cast_fp16, var_4093_cast_fp16, var_4095_cast_fp16, var_4097_cast_fp16, var_4099_cast_fp16, var_4101_cast_fp16, var_4103_cast_fp16, var_4105_cast_fp16))[name = tensor<string, []>("input_145_cast_fp16")];
+            tensor<string, []> var_4114_pad_type_0 = const()[name = tensor<string, []>("op_4114_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4114_strides_0 = const()[name = tensor<string, []>("op_4114_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4114_pad_0 = const()[name = tensor<string, []>("op_4114_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4114_dilations_0 = const()[name = tensor<string, []>("op_4114_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4114_groups_0 = const()[name = tensor<string, []>("op_4114_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_14_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(575077312)))];
+            tensor<fp16, [1280]> blocks_14_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578354176)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4114_cast_fp16 = conv(bias = blocks_14_attn_out_bias_to_fp16, dilations = var_4114_dilations_0, groups = var_4114_groups_0, pad = var_4114_pad_0, pad_type = var_4114_pad_type_0, strides = var_4114_strides_0, weight = blocks_14_attn_out_weight_to_fp16, x = input_145_cast_fp16)[name = tensor<string, []>("op_4114_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = var_4114_cast_fp16)[name = tensor<string, []>("inputs_59_cast_fp16")];
+            tensor<int32, [1]> input_147_axes_0 = const()[name = tensor<string, []>("input_147_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_147_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_147_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578356800)))];
+            tensor<fp16, [1280]> input_147_beta_0_to_fp16 = const()[name = tensor<string, []>("input_147_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578359424)))];
+            tensor<fp16, []> var_4124_to_fp16 = const()[name = tensor<string, []>("op_4124_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_147_cast_fp16 = layer_norm(axes = input_147_axes_0, beta = input_147_beta_0_to_fp16, epsilon = var_4124_to_fp16, gamma = input_147_gamma_0_to_fp16, x = inputs_59_cast_fp16)[name = tensor<string, []>("input_147_cast_fp16")];
+            tensor<string, []> input_149_pad_type_0 = const()[name = tensor<string, []>("input_149_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_149_strides_0 = const()[name = tensor<string, []>("input_149_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_149_pad_0 = const()[name = tensor<string, []>("input_149_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_149_dilations_0 = const()[name = tensor<string, []>("input_149_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_149_groups_0 = const()[name = tensor<string, []>("input_149_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_14_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578362048)))];
+            tensor<fp16, [5120]> blocks_14_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(591469312)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_149_cast_fp16 = conv(bias = blocks_14_mlp_0_bias_to_fp16, dilations = input_149_dilations_0, groups = input_149_groups_0, pad = input_149_pad_0, pad_type = input_149_pad_type_0, strides = input_149_strides_0, weight = blocks_14_mlp_0_weight_to_fp16, x = input_147_cast_fp16)[name = tensor<string, []>("input_149_cast_fp16")];
+            tensor<string, []> input_151_mode_0 = const()[name = tensor<string, []>("input_151_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_151_cast_fp16 = gelu(mode = input_151_mode_0, x = input_149_cast_fp16)[name = tensor<string, []>("input_151_cast_fp16")];
+            tensor<string, []> var_4150_pad_type_0 = const()[name = tensor<string, []>("op_4150_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4150_strides_0 = const()[name = tensor<string, []>("op_4150_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4150_pad_0 = const()[name = tensor<string, []>("op_4150_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4150_dilations_0 = const()[name = tensor<string, []>("op_4150_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4150_groups_0 = const()[name = tensor<string, []>("op_4150_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_14_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(591479616)))];
+            tensor<fp16, [1280]> blocks_14_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(604586880)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4150_cast_fp16 = conv(bias = blocks_14_mlp_2_bias_to_fp16, dilations = var_4150_dilations_0, groups = var_4150_groups_0, pad = var_4150_pad_0, pad_type = var_4150_pad_type_0, strides = var_4150_strides_0, weight = blocks_14_mlp_2_weight_to_fp16, x = input_151_cast_fp16)[name = tensor<string, []>("op_4150_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = var_4150_cast_fp16)[name = tensor<string, []>("inputs_61_cast_fp16")];
+            tensor<int32, []> var_4159 = const()[name = tensor<string, []>("op_4159"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_153_axes_0 = const()[name = tensor<string, []>("input_153_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_153_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_153_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(604589504)))];
+            tensor<fp16, [1280]> input_153_beta_0_to_fp16 = const()[name = tensor<string, []>("input_153_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(604592128)))];
+            tensor<fp16, []> var_4175_to_fp16 = const()[name = tensor<string, []>("op_4175_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_153_cast_fp16 = layer_norm(axes = input_153_axes_0, beta = input_153_beta_0_to_fp16, epsilon = var_4175_to_fp16, gamma = input_153_gamma_0_to_fp16, x = inputs_61_cast_fp16)[name = tensor<string, []>("input_153_cast_fp16")];
+            tensor<string, []> q_31_pad_type_0 = const()[name = tensor<string, []>("q_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_31_strides_0 = const()[name = tensor<string, []>("q_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_31_pad_0 = const()[name = tensor<string, []>("q_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_31_dilations_0 = const()[name = tensor<string, []>("q_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_31_groups_0 = const()[name = tensor<string, []>("q_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_4210_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4210_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(604594752)))];
+            tensor<fp16, [1280]> var_4210_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4210_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(607871616)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4210_cast_fp16 = conv(bias = var_4210_bias_0_to_fp16, dilations = q_31_dilations_0, groups = q_31_groups_0, pad = q_31_pad_0, pad_type = q_31_pad_type_0, strides = q_31_strides_0, weight = var_4210_weight_0_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("op_4210_cast_fp16")];
+            tensor<string, []> k_31_pad_type_0 = const()[name = tensor<string, []>("k_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_31_strides_0 = const()[name = tensor<string, []>("k_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_31_pad_0 = const()[name = tensor<string, []>("k_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_31_dilations_0 = const()[name = tensor<string, []>("k_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_31_groups_0 = const()[name = tensor<string, []>("k_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_15_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(607874240)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_31_cast_fp16 = conv(dilations = k_31_dilations_0, groups = k_31_groups_0, pad = k_31_pad_0, pad_type = k_31_pad_type_0, strides = k_31_strides_0, weight = blocks_15_attn_key_weight_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("k_31_cast_fp16")];
+            tensor<string, []> var_4208_pad_type_0 = const()[name = tensor<string, []>("op_4208_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4208_strides_0 = const()[name = tensor<string, []>("op_4208_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4208_pad_0 = const()[name = tensor<string, []>("op_4208_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4208_dilations_0 = const()[name = tensor<string, []>("op_4208_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4208_groups_0 = const()[name = tensor<string, []>("op_4208_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_15_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(611151104)))];
+            tensor<fp16, [1280]> blocks_15_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614427968)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4208_cast_fp16 = conv(bias = blocks_15_attn_value_bias_to_fp16, dilations = var_4208_dilations_0, groups = var_4208_groups_0, pad = var_4208_pad_0, pad_type = var_4208_pad_type_0, strides = var_4208_strides_0, weight = blocks_15_attn_value_weight_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("op_4208_cast_fp16")];
+            tensor<int32, [20]> tile_45 = const()[name = tensor<string, []>("tile_45"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4211_axis_0 = const()[name = tensor<string, []>("op_4211_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_19 = split(axis = var_4211_axis_0, split_sizes = tile_45, x = var_4210_cast_fp16)[name = tensor<string, []>("op_4211_cast_fp16")];
+            tensor<int32, [4]> var_4232_perm_0 = const()[name = tensor<string, []>("op_4232_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_46 = const()[name = tensor<string, []>("tile_46"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4233_axis_0 = const()[name = tensor<string, []>("op_4233_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_4232_cast_fp16 = transpose(perm = var_4232_perm_0, x = k_31_cast_fp16)[name = tensor<string, []>("transpose_17")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_19 = split(axis = var_4233_axis_0, split_sizes = tile_46, x = var_4232_cast_fp16)[name = tensor<string, []>("op_4233_cast_fp16")];
+            tensor<int32, [20]> tile_47 = const()[name = tensor<string, []>("tile_47"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4254_axis_0 = const()[name = tensor<string, []>("op_4254_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_19 = split(axis = var_4254_axis_0, split_sizes = tile_47, x = var_4208_cast_fp16)[name = tensor<string, []>("op_4254_cast_fp16")];
+            tensor<string, []> aw_601_equation_0 = const()[name = tensor<string, []>("aw_601_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_601_cast_fp16 = einsum(equation = aw_601_equation_0, values = (var_4233_cast_fp16_0, var_4211_cast_fp16_0))[name = tensor<string, []>("aw_601_cast_fp16")];
+            tensor<string, []> aw_603_equation_0 = const()[name = tensor<string, []>("aw_603_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_603_cast_fp16 = einsum(equation = aw_603_equation_0, values = (var_4233_cast_fp16_1, var_4211_cast_fp16_1))[name = tensor<string, []>("aw_603_cast_fp16")];
+            tensor<string, []> aw_605_equation_0 = const()[name = tensor<string, []>("aw_605_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_605_cast_fp16 = einsum(equation = aw_605_equation_0, values = (var_4233_cast_fp16_2, var_4211_cast_fp16_2))[name = tensor<string, []>("aw_605_cast_fp16")];
+            tensor<string, []> aw_607_equation_0 = const()[name = tensor<string, []>("aw_607_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_607_cast_fp16 = einsum(equation = aw_607_equation_0, values = (var_4233_cast_fp16_3, var_4211_cast_fp16_3))[name = tensor<string, []>("aw_607_cast_fp16")];
+            tensor<string, []> aw_609_equation_0 = const()[name = tensor<string, []>("aw_609_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_609_cast_fp16 = einsum(equation = aw_609_equation_0, values = (var_4233_cast_fp16_4, var_4211_cast_fp16_4))[name = tensor<string, []>("aw_609_cast_fp16")];
+            tensor<string, []> aw_611_equation_0 = const()[name = tensor<string, []>("aw_611_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_611_cast_fp16 = einsum(equation = aw_611_equation_0, values = (var_4233_cast_fp16_5, var_4211_cast_fp16_5))[name = tensor<string, []>("aw_611_cast_fp16")];
+            tensor<string, []> aw_613_equation_0 = const()[name = tensor<string, []>("aw_613_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_613_cast_fp16 = einsum(equation = aw_613_equation_0, values = (var_4233_cast_fp16_6, var_4211_cast_fp16_6))[name = tensor<string, []>("aw_613_cast_fp16")];
+            tensor<string, []> aw_615_equation_0 = const()[name = tensor<string, []>("aw_615_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_615_cast_fp16 = einsum(equation = aw_615_equation_0, values = (var_4233_cast_fp16_7, var_4211_cast_fp16_7))[name = tensor<string, []>("aw_615_cast_fp16")];
+            tensor<string, []> aw_617_equation_0 = const()[name = tensor<string, []>("aw_617_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_617_cast_fp16 = einsum(equation = aw_617_equation_0, values = (var_4233_cast_fp16_8, var_4211_cast_fp16_8))[name = tensor<string, []>("aw_617_cast_fp16")];
+            tensor<string, []> aw_619_equation_0 = const()[name = tensor<string, []>("aw_619_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_619_cast_fp16 = einsum(equation = aw_619_equation_0, values = (var_4233_cast_fp16_9, var_4211_cast_fp16_9))[name = tensor<string, []>("aw_619_cast_fp16")];
+            tensor<string, []> aw_621_equation_0 = const()[name = tensor<string, []>("aw_621_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_621_cast_fp16 = einsum(equation = aw_621_equation_0, values = (var_4233_cast_fp16_10, var_4211_cast_fp16_10))[name = tensor<string, []>("aw_621_cast_fp16")];
+            tensor<string, []> aw_623_equation_0 = const()[name = tensor<string, []>("aw_623_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_623_cast_fp16 = einsum(equation = aw_623_equation_0, values = (var_4233_cast_fp16_11, var_4211_cast_fp16_11))[name = tensor<string, []>("aw_623_cast_fp16")];
+            tensor<string, []> aw_625_equation_0 = const()[name = tensor<string, []>("aw_625_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_625_cast_fp16 = einsum(equation = aw_625_equation_0, values = (var_4233_cast_fp16_12, var_4211_cast_fp16_12))[name = tensor<string, []>("aw_625_cast_fp16")];
+            tensor<string, []> aw_627_equation_0 = const()[name = tensor<string, []>("aw_627_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_627_cast_fp16 = einsum(equation = aw_627_equation_0, values = (var_4233_cast_fp16_13, var_4211_cast_fp16_13))[name = tensor<string, []>("aw_627_cast_fp16")];
+            tensor<string, []> aw_629_equation_0 = const()[name = tensor<string, []>("aw_629_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_629_cast_fp16 = einsum(equation = aw_629_equation_0, values = (var_4233_cast_fp16_14, var_4211_cast_fp16_14))[name = tensor<string, []>("aw_629_cast_fp16")];
+            tensor<string, []> aw_631_equation_0 = const()[name = tensor<string, []>("aw_631_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_631_cast_fp16 = einsum(equation = aw_631_equation_0, values = (var_4233_cast_fp16_15, var_4211_cast_fp16_15))[name = tensor<string, []>("aw_631_cast_fp16")];
+            tensor<string, []> aw_633_equation_0 = const()[name = tensor<string, []>("aw_633_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_633_cast_fp16 = einsum(equation = aw_633_equation_0, values = (var_4233_cast_fp16_16, var_4211_cast_fp16_16))[name = tensor<string, []>("aw_633_cast_fp16")];
+            tensor<string, []> aw_635_equation_0 = const()[name = tensor<string, []>("aw_635_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_635_cast_fp16 = einsum(equation = aw_635_equation_0, values = (var_4233_cast_fp16_17, var_4211_cast_fp16_17))[name = tensor<string, []>("aw_635_cast_fp16")];
+            tensor<string, []> aw_637_equation_0 = const()[name = tensor<string, []>("aw_637_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_637_cast_fp16 = einsum(equation = aw_637_equation_0, values = (var_4233_cast_fp16_18, var_4211_cast_fp16_18))[name = tensor<string, []>("aw_637_cast_fp16")];
+            tensor<string, []> aw_639_equation_0 = const()[name = tensor<string, []>("aw_639_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_639_cast_fp16 = einsum(equation = aw_639_equation_0, values = (var_4233_cast_fp16_19, var_4211_cast_fp16_19))[name = tensor<string, []>("aw_639_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4315_cast_fp16 = softmax(axis = var_4159, x = aw_601_cast_fp16)[name = tensor<string, []>("op_4315_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4316_cast_fp16 = softmax(axis = var_4159, x = aw_603_cast_fp16)[name = tensor<string, []>("op_4316_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4317_cast_fp16 = softmax(axis = var_4159, x = aw_605_cast_fp16)[name = tensor<string, []>("op_4317_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4318_cast_fp16 = softmax(axis = var_4159, x = aw_607_cast_fp16)[name = tensor<string, []>("op_4318_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4319_cast_fp16 = softmax(axis = var_4159, x = aw_609_cast_fp16)[name = tensor<string, []>("op_4319_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4320_cast_fp16 = softmax(axis = var_4159, x = aw_611_cast_fp16)[name = tensor<string, []>("op_4320_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4321_cast_fp16 = softmax(axis = var_4159, x = aw_613_cast_fp16)[name = tensor<string, []>("op_4321_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4322_cast_fp16 = softmax(axis = var_4159, x = aw_615_cast_fp16)[name = tensor<string, []>("op_4322_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4323_cast_fp16 = softmax(axis = var_4159, x = aw_617_cast_fp16)[name = tensor<string, []>("op_4323_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4324_cast_fp16 = softmax(axis = var_4159, x = aw_619_cast_fp16)[name = tensor<string, []>("op_4324_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4325_cast_fp16 = softmax(axis = var_4159, x = aw_621_cast_fp16)[name = tensor<string, []>("op_4325_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4326_cast_fp16 = softmax(axis = var_4159, x = aw_623_cast_fp16)[name = tensor<string, []>("op_4326_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4327_cast_fp16 = softmax(axis = var_4159, x = aw_625_cast_fp16)[name = tensor<string, []>("op_4327_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4328_cast_fp16 = softmax(axis = var_4159, x = aw_627_cast_fp16)[name = tensor<string, []>("op_4328_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4329_cast_fp16 = softmax(axis = var_4159, x = aw_629_cast_fp16)[name = tensor<string, []>("op_4329_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4330_cast_fp16 = softmax(axis = var_4159, x = aw_631_cast_fp16)[name = tensor<string, []>("op_4330_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4331_cast_fp16 = softmax(axis = var_4159, x = aw_633_cast_fp16)[name = tensor<string, []>("op_4331_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4332_cast_fp16 = softmax(axis = var_4159, x = aw_635_cast_fp16)[name = tensor<string, []>("op_4332_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4333_cast_fp16 = softmax(axis = var_4159, x = aw_637_cast_fp16)[name = tensor<string, []>("op_4333_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4334_cast_fp16 = softmax(axis = var_4159, x = aw_639_cast_fp16)[name = tensor<string, []>("op_4334_cast_fp16")];
+            tensor<string, []> var_4336_equation_0 = const()[name = tensor<string, []>("op_4336_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4336_cast_fp16 = einsum(equation = var_4336_equation_0, values = (var_4254_cast_fp16_0, var_4315_cast_fp16))[name = tensor<string, []>("op_4336_cast_fp16")];
+            tensor<string, []> var_4338_equation_0 = const()[name = tensor<string, []>("op_4338_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4338_cast_fp16 = einsum(equation = var_4338_equation_0, values = (var_4254_cast_fp16_1, var_4316_cast_fp16))[name = tensor<string, []>("op_4338_cast_fp16")];
+            tensor<string, []> var_4340_equation_0 = const()[name = tensor<string, []>("op_4340_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4340_cast_fp16 = einsum(equation = var_4340_equation_0, values = (var_4254_cast_fp16_2, var_4317_cast_fp16))[name = tensor<string, []>("op_4340_cast_fp16")];
+            tensor<string, []> var_4342_equation_0 = const()[name = tensor<string, []>("op_4342_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4342_cast_fp16 = einsum(equation = var_4342_equation_0, values = (var_4254_cast_fp16_3, var_4318_cast_fp16))[name = tensor<string, []>("op_4342_cast_fp16")];
+            tensor<string, []> var_4344_equation_0 = const()[name = tensor<string, []>("op_4344_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4344_cast_fp16 = einsum(equation = var_4344_equation_0, values = (var_4254_cast_fp16_4, var_4319_cast_fp16))[name = tensor<string, []>("op_4344_cast_fp16")];
+            tensor<string, []> var_4346_equation_0 = const()[name = tensor<string, []>("op_4346_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4346_cast_fp16 = einsum(equation = var_4346_equation_0, values = (var_4254_cast_fp16_5, var_4320_cast_fp16))[name = tensor<string, []>("op_4346_cast_fp16")];
+            tensor<string, []> var_4348_equation_0 = const()[name = tensor<string, []>("op_4348_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4348_cast_fp16 = einsum(equation = var_4348_equation_0, values = (var_4254_cast_fp16_6, var_4321_cast_fp16))[name = tensor<string, []>("op_4348_cast_fp16")];
+            tensor<string, []> var_4350_equation_0 = const()[name = tensor<string, []>("op_4350_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4350_cast_fp16 = einsum(equation = var_4350_equation_0, values = (var_4254_cast_fp16_7, var_4322_cast_fp16))[name = tensor<string, []>("op_4350_cast_fp16")];
+            tensor<string, []> var_4352_equation_0 = const()[name = tensor<string, []>("op_4352_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4352_cast_fp16 = einsum(equation = var_4352_equation_0, values = (var_4254_cast_fp16_8, var_4323_cast_fp16))[name = tensor<string, []>("op_4352_cast_fp16")];
+            tensor<string, []> var_4354_equation_0 = const()[name = tensor<string, []>("op_4354_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4354_cast_fp16 = einsum(equation = var_4354_equation_0, values = (var_4254_cast_fp16_9, var_4324_cast_fp16))[name = tensor<string, []>("op_4354_cast_fp16")];
+            tensor<string, []> var_4356_equation_0 = const()[name = tensor<string, []>("op_4356_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4356_cast_fp16 = einsum(equation = var_4356_equation_0, values = (var_4254_cast_fp16_10, var_4325_cast_fp16))[name = tensor<string, []>("op_4356_cast_fp16")];
+            tensor<string, []> var_4358_equation_0 = const()[name = tensor<string, []>("op_4358_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4358_cast_fp16 = einsum(equation = var_4358_equation_0, values = (var_4254_cast_fp16_11, var_4326_cast_fp16))[name = tensor<string, []>("op_4358_cast_fp16")];
+            tensor<string, []> var_4360_equation_0 = const()[name = tensor<string, []>("op_4360_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4360_cast_fp16 = einsum(equation = var_4360_equation_0, values = (var_4254_cast_fp16_12, var_4327_cast_fp16))[name = tensor<string, []>("op_4360_cast_fp16")];
+            tensor<string, []> var_4362_equation_0 = const()[name = tensor<string, []>("op_4362_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4362_cast_fp16 = einsum(equation = var_4362_equation_0, values = (var_4254_cast_fp16_13, var_4328_cast_fp16))[name = tensor<string, []>("op_4362_cast_fp16")];
+            tensor<string, []> var_4364_equation_0 = const()[name = tensor<string, []>("op_4364_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4364_cast_fp16 = einsum(equation = var_4364_equation_0, values = (var_4254_cast_fp16_14, var_4329_cast_fp16))[name = tensor<string, []>("op_4364_cast_fp16")];
+            tensor<string, []> var_4366_equation_0 = const()[name = tensor<string, []>("op_4366_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4366_cast_fp16 = einsum(equation = var_4366_equation_0, values = (var_4254_cast_fp16_15, var_4330_cast_fp16))[name = tensor<string, []>("op_4366_cast_fp16")];
+            tensor<string, []> var_4368_equation_0 = const()[name = tensor<string, []>("op_4368_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4368_cast_fp16 = einsum(equation = var_4368_equation_0, values = (var_4254_cast_fp16_16, var_4331_cast_fp16))[name = tensor<string, []>("op_4368_cast_fp16")];
+            tensor<string, []> var_4370_equation_0 = const()[name = tensor<string, []>("op_4370_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4370_cast_fp16 = einsum(equation = var_4370_equation_0, values = (var_4254_cast_fp16_17, var_4332_cast_fp16))[name = tensor<string, []>("op_4370_cast_fp16")];
+            tensor<string, []> var_4372_equation_0 = const()[name = tensor<string, []>("op_4372_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4372_cast_fp16 = einsum(equation = var_4372_equation_0, values = (var_4254_cast_fp16_18, var_4333_cast_fp16))[name = tensor<string, []>("op_4372_cast_fp16")];
+            tensor<string, []> var_4374_equation_0 = const()[name = tensor<string, []>("op_4374_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4374_cast_fp16 = einsum(equation = var_4374_equation_0, values = (var_4254_cast_fp16_19, var_4334_cast_fp16))[name = tensor<string, []>("op_4374_cast_fp16")];
+            tensor<bool, []> input_155_interleave_0 = const()[name = tensor<string, []>("input_155_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_155_cast_fp16 = concat(axis = var_4159, interleave = input_155_interleave_0, values = (var_4336_cast_fp16, var_4338_cast_fp16, var_4340_cast_fp16, var_4342_cast_fp16, var_4344_cast_fp16, var_4346_cast_fp16, var_4348_cast_fp16, var_4350_cast_fp16, var_4352_cast_fp16, var_4354_cast_fp16, var_4356_cast_fp16, var_4358_cast_fp16, var_4360_cast_fp16, var_4362_cast_fp16, var_4364_cast_fp16, var_4366_cast_fp16, var_4368_cast_fp16, var_4370_cast_fp16, var_4372_cast_fp16, var_4374_cast_fp16))[name = tensor<string, []>("input_155_cast_fp16")];
+            tensor<string, []> var_4383_pad_type_0 = const()[name = tensor<string, []>("op_4383_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4383_strides_0 = const()[name = tensor<string, []>("op_4383_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4383_pad_0 = const()[name = tensor<string, []>("op_4383_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4383_dilations_0 = const()[name = tensor<string, []>("op_4383_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4383_groups_0 = const()[name = tensor<string, []>("op_4383_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_15_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614430592)))];
+            tensor<fp16, [1280]> blocks_15_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(617707456)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4383_cast_fp16 = conv(bias = blocks_15_attn_out_bias_to_fp16, dilations = var_4383_dilations_0, groups = var_4383_groups_0, pad = var_4383_pad_0, pad_type = var_4383_pad_type_0, strides = var_4383_strides_0, weight = blocks_15_attn_out_weight_to_fp16, x = input_155_cast_fp16)[name = tensor<string, []>("op_4383_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = var_4383_cast_fp16)[name = tensor<string, []>("inputs_63_cast_fp16")];
+            tensor<int32, [1]> input_157_axes_0 = const()[name = tensor<string, []>("input_157_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_157_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_157_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(617710080)))];
+            tensor<fp16, [1280]> input_157_beta_0_to_fp16 = const()[name = tensor<string, []>("input_157_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(617712704)))];
+            tensor<fp16, []> var_4393_to_fp16 = const()[name = tensor<string, []>("op_4393_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_157_cast_fp16 = layer_norm(axes = input_157_axes_0, beta = input_157_beta_0_to_fp16, epsilon = var_4393_to_fp16, gamma = input_157_gamma_0_to_fp16, x = inputs_63_cast_fp16)[name = tensor<string, []>("input_157_cast_fp16")];
+            tensor<string, []> input_159_pad_type_0 = const()[name = tensor<string, []>("input_159_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_159_strides_0 = const()[name = tensor<string, []>("input_159_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_159_pad_0 = const()[name = tensor<string, []>("input_159_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_159_dilations_0 = const()[name = tensor<string, []>("input_159_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_159_groups_0 = const()[name = tensor<string, []>("input_159_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_15_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(617715328)))];
+            tensor<fp16, [5120]> blocks_15_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(630822592)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_159_cast_fp16 = conv(bias = blocks_15_mlp_0_bias_to_fp16, dilations = input_159_dilations_0, groups = input_159_groups_0, pad = input_159_pad_0, pad_type = input_159_pad_type_0, strides = input_159_strides_0, weight = blocks_15_mlp_0_weight_to_fp16, x = input_157_cast_fp16)[name = tensor<string, []>("input_159_cast_fp16")];
+            tensor<string, []> input_161_mode_0 = const()[name = tensor<string, []>("input_161_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_161_cast_fp16 = gelu(mode = input_161_mode_0, x = input_159_cast_fp16)[name = tensor<string, []>("input_161_cast_fp16")];
+            tensor<string, []> var_4419_pad_type_0 = const()[name = tensor<string, []>("op_4419_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4419_strides_0 = const()[name = tensor<string, []>("op_4419_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4419_pad_0 = const()[name = tensor<string, []>("op_4419_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4419_dilations_0 = const()[name = tensor<string, []>("op_4419_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4419_groups_0 = const()[name = tensor<string, []>("op_4419_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_15_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(630832896)))];
+            tensor<fp16, [1280]> blocks_15_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(643940160)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4419_cast_fp16 = conv(bias = blocks_15_mlp_2_bias_to_fp16, dilations = var_4419_dilations_0, groups = var_4419_groups_0, pad = var_4419_pad_0, pad_type = var_4419_pad_type_0, strides = var_4419_strides_0, weight = blocks_15_mlp_2_weight_to_fp16, x = input_161_cast_fp16)[name = tensor<string, []>("op_4419_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = var_4419_cast_fp16)[name = tensor<string, []>("inputs_65_cast_fp16")];
+            tensor<int32, []> var_4428 = const()[name = tensor<string, []>("op_4428"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_163_axes_0 = const()[name = tensor<string, []>("input_163_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_163_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_163_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(643942784)))];
+            tensor<fp16, [1280]> input_163_beta_0_to_fp16 = const()[name = tensor<string, []>("input_163_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(643945408)))];
+            tensor<fp16, []> var_4444_to_fp16 = const()[name = tensor<string, []>("op_4444_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_163_cast_fp16 = layer_norm(axes = input_163_axes_0, beta = input_163_beta_0_to_fp16, epsilon = var_4444_to_fp16, gamma = input_163_gamma_0_to_fp16, x = inputs_65_cast_fp16)[name = tensor<string, []>("input_163_cast_fp16")];
+            tensor<string, []> q_33_pad_type_0 = const()[name = tensor<string, []>("q_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_33_strides_0 = const()[name = tensor<string, []>("q_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_33_pad_0 = const()[name = tensor<string, []>("q_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_33_dilations_0 = const()[name = tensor<string, []>("q_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_33_groups_0 = const()[name = tensor<string, []>("q_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_4479_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4479_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(643948032)))];
+            tensor<fp16, [1280]> var_4479_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4479_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(647224896)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4479_cast_fp16 = conv(bias = var_4479_bias_0_to_fp16, dilations = q_33_dilations_0, groups = q_33_groups_0, pad = q_33_pad_0, pad_type = q_33_pad_type_0, strides = q_33_strides_0, weight = var_4479_weight_0_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("op_4479_cast_fp16")];
+            tensor<string, []> k_33_pad_type_0 = const()[name = tensor<string, []>("k_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_33_strides_0 = const()[name = tensor<string, []>("k_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_33_pad_0 = const()[name = tensor<string, []>("k_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_33_dilations_0 = const()[name = tensor<string, []>("k_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_33_groups_0 = const()[name = tensor<string, []>("k_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_16_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(647227520)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_33_cast_fp16 = conv(dilations = k_33_dilations_0, groups = k_33_groups_0, pad = k_33_pad_0, pad_type = k_33_pad_type_0, strides = k_33_strides_0, weight = blocks_16_attn_key_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("k_33_cast_fp16")];
+            tensor<string, []> var_4477_pad_type_0 = const()[name = tensor<string, []>("op_4477_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4477_strides_0 = const()[name = tensor<string, []>("op_4477_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4477_pad_0 = const()[name = tensor<string, []>("op_4477_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4477_dilations_0 = const()[name = tensor<string, []>("op_4477_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4477_groups_0 = const()[name = tensor<string, []>("op_4477_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_16_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(650504384)))];
+            tensor<fp16, [1280]> blocks_16_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(653781248)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4477_cast_fp16 = conv(bias = blocks_16_attn_value_bias_to_fp16, dilations = var_4477_dilations_0, groups = var_4477_groups_0, pad = var_4477_pad_0, pad_type = var_4477_pad_type_0, strides = var_4477_strides_0, weight = blocks_16_attn_value_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("op_4477_cast_fp16")];
+            tensor<int32, [20]> tile_48 = const()[name = tensor<string, []>("tile_48"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4480_axis_0 = const()[name = tensor<string, []>("op_4480_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_19 = split(axis = var_4480_axis_0, split_sizes = tile_48, x = var_4479_cast_fp16)[name = tensor<string, []>("op_4480_cast_fp16")];
+            tensor<int32, [4]> var_4501_perm_0 = const()[name = tensor<string, []>("op_4501_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_49 = const()[name = tensor<string, []>("tile_49"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4502_axis_0 = const()[name = tensor<string, []>("op_4502_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_4501_cast_fp16 = transpose(perm = var_4501_perm_0, x = k_33_cast_fp16)[name = tensor<string, []>("transpose_16")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_19 = split(axis = var_4502_axis_0, split_sizes = tile_49, x = var_4501_cast_fp16)[name = tensor<string, []>("op_4502_cast_fp16")];
+            tensor<int32, [20]> tile_50 = const()[name = tensor<string, []>("tile_50"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4523_axis_0 = const()[name = tensor<string, []>("op_4523_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_19 = split(axis = var_4523_axis_0, split_sizes = tile_50, x = var_4477_cast_fp16)[name = tensor<string, []>("op_4523_cast_fp16")];
+            tensor<string, []> aw_641_equation_0 = const()[name = tensor<string, []>("aw_641_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_641_cast_fp16 = einsum(equation = aw_641_equation_0, values = (var_4502_cast_fp16_0, var_4480_cast_fp16_0))[name = tensor<string, []>("aw_641_cast_fp16")];
+            tensor<string, []> aw_643_equation_0 = const()[name = tensor<string, []>("aw_643_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_643_cast_fp16 = einsum(equation = aw_643_equation_0, values = (var_4502_cast_fp16_1, var_4480_cast_fp16_1))[name = tensor<string, []>("aw_643_cast_fp16")];
+            tensor<string, []> aw_645_equation_0 = const()[name = tensor<string, []>("aw_645_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_645_cast_fp16 = einsum(equation = aw_645_equation_0, values = (var_4502_cast_fp16_2, var_4480_cast_fp16_2))[name = tensor<string, []>("aw_645_cast_fp16")];
+            tensor<string, []> aw_647_equation_0 = const()[name = tensor<string, []>("aw_647_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_647_cast_fp16 = einsum(equation = aw_647_equation_0, values = (var_4502_cast_fp16_3, var_4480_cast_fp16_3))[name = tensor<string, []>("aw_647_cast_fp16")];
+            tensor<string, []> aw_649_equation_0 = const()[name = tensor<string, []>("aw_649_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_649_cast_fp16 = einsum(equation = aw_649_equation_0, values = (var_4502_cast_fp16_4, var_4480_cast_fp16_4))[name = tensor<string, []>("aw_649_cast_fp16")];
+            tensor<string, []> aw_651_equation_0 = const()[name = tensor<string, []>("aw_651_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_651_cast_fp16 = einsum(equation = aw_651_equation_0, values = (var_4502_cast_fp16_5, var_4480_cast_fp16_5))[name = tensor<string, []>("aw_651_cast_fp16")];
+            tensor<string, []> aw_653_equation_0 = const()[name = tensor<string, []>("aw_653_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_653_cast_fp16 = einsum(equation = aw_653_equation_0, values = (var_4502_cast_fp16_6, var_4480_cast_fp16_6))[name = tensor<string, []>("aw_653_cast_fp16")];
+            tensor<string, []> aw_655_equation_0 = const()[name = tensor<string, []>("aw_655_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_655_cast_fp16 = einsum(equation = aw_655_equation_0, values = (var_4502_cast_fp16_7, var_4480_cast_fp16_7))[name = tensor<string, []>("aw_655_cast_fp16")];
+            tensor<string, []> aw_657_equation_0 = const()[name = tensor<string, []>("aw_657_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_657_cast_fp16 = einsum(equation = aw_657_equation_0, values = (var_4502_cast_fp16_8, var_4480_cast_fp16_8))[name = tensor<string, []>("aw_657_cast_fp16")];
+            tensor<string, []> aw_659_equation_0 = const()[name = tensor<string, []>("aw_659_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_659_cast_fp16 = einsum(equation = aw_659_equation_0, values = (var_4502_cast_fp16_9, var_4480_cast_fp16_9))[name = tensor<string, []>("aw_659_cast_fp16")];
+            tensor<string, []> aw_661_equation_0 = const()[name = tensor<string, []>("aw_661_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_661_cast_fp16 = einsum(equation = aw_661_equation_0, values = (var_4502_cast_fp16_10, var_4480_cast_fp16_10))[name = tensor<string, []>("aw_661_cast_fp16")];
+            tensor<string, []> aw_663_equation_0 = const()[name = tensor<string, []>("aw_663_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_663_cast_fp16 = einsum(equation = aw_663_equation_0, values = (var_4502_cast_fp16_11, var_4480_cast_fp16_11))[name = tensor<string, []>("aw_663_cast_fp16")];
+            tensor<string, []> aw_665_equation_0 = const()[name = tensor<string, []>("aw_665_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_665_cast_fp16 = einsum(equation = aw_665_equation_0, values = (var_4502_cast_fp16_12, var_4480_cast_fp16_12))[name = tensor<string, []>("aw_665_cast_fp16")];
+            tensor<string, []> aw_667_equation_0 = const()[name = tensor<string, []>("aw_667_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_667_cast_fp16 = einsum(equation = aw_667_equation_0, values = (var_4502_cast_fp16_13, var_4480_cast_fp16_13))[name = tensor<string, []>("aw_667_cast_fp16")];
+            tensor<string, []> aw_669_equation_0 = const()[name = tensor<string, []>("aw_669_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_669_cast_fp16 = einsum(equation = aw_669_equation_0, values = (var_4502_cast_fp16_14, var_4480_cast_fp16_14))[name = tensor<string, []>("aw_669_cast_fp16")];
+            tensor<string, []> aw_671_equation_0 = const()[name = tensor<string, []>("aw_671_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_671_cast_fp16 = einsum(equation = aw_671_equation_0, values = (var_4502_cast_fp16_15, var_4480_cast_fp16_15))[name = tensor<string, []>("aw_671_cast_fp16")];
+            tensor<string, []> aw_673_equation_0 = const()[name = tensor<string, []>("aw_673_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_673_cast_fp16 = einsum(equation = aw_673_equation_0, values = (var_4502_cast_fp16_16, var_4480_cast_fp16_16))[name = tensor<string, []>("aw_673_cast_fp16")];
+            tensor<string, []> aw_675_equation_0 = const()[name = tensor<string, []>("aw_675_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_675_cast_fp16 = einsum(equation = aw_675_equation_0, values = (var_4502_cast_fp16_17, var_4480_cast_fp16_17))[name = tensor<string, []>("aw_675_cast_fp16")];
+            tensor<string, []> aw_677_equation_0 = const()[name = tensor<string, []>("aw_677_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_677_cast_fp16 = einsum(equation = aw_677_equation_0, values = (var_4502_cast_fp16_18, var_4480_cast_fp16_18))[name = tensor<string, []>("aw_677_cast_fp16")];
+            tensor<string, []> aw_679_equation_0 = const()[name = tensor<string, []>("aw_679_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_679_cast_fp16 = einsum(equation = aw_679_equation_0, values = (var_4502_cast_fp16_19, var_4480_cast_fp16_19))[name = tensor<string, []>("aw_679_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4584_cast_fp16 = softmax(axis = var_4428, x = aw_641_cast_fp16)[name = tensor<string, []>("op_4584_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4585_cast_fp16 = softmax(axis = var_4428, x = aw_643_cast_fp16)[name = tensor<string, []>("op_4585_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4586_cast_fp16 = softmax(axis = var_4428, x = aw_645_cast_fp16)[name = tensor<string, []>("op_4586_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4587_cast_fp16 = softmax(axis = var_4428, x = aw_647_cast_fp16)[name = tensor<string, []>("op_4587_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4588_cast_fp16 = softmax(axis = var_4428, x = aw_649_cast_fp16)[name = tensor<string, []>("op_4588_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4589_cast_fp16 = softmax(axis = var_4428, x = aw_651_cast_fp16)[name = tensor<string, []>("op_4589_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4590_cast_fp16 = softmax(axis = var_4428, x = aw_653_cast_fp16)[name = tensor<string, []>("op_4590_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4591_cast_fp16 = softmax(axis = var_4428, x = aw_655_cast_fp16)[name = tensor<string, []>("op_4591_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4592_cast_fp16 = softmax(axis = var_4428, x = aw_657_cast_fp16)[name = tensor<string, []>("op_4592_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4593_cast_fp16 = softmax(axis = var_4428, x = aw_659_cast_fp16)[name = tensor<string, []>("op_4593_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4594_cast_fp16 = softmax(axis = var_4428, x = aw_661_cast_fp16)[name = tensor<string, []>("op_4594_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4595_cast_fp16 = softmax(axis = var_4428, x = aw_663_cast_fp16)[name = tensor<string, []>("op_4595_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4596_cast_fp16 = softmax(axis = var_4428, x = aw_665_cast_fp16)[name = tensor<string, []>("op_4596_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4597_cast_fp16 = softmax(axis = var_4428, x = aw_667_cast_fp16)[name = tensor<string, []>("op_4597_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4598_cast_fp16 = softmax(axis = var_4428, x = aw_669_cast_fp16)[name = tensor<string, []>("op_4598_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4599_cast_fp16 = softmax(axis = var_4428, x = aw_671_cast_fp16)[name = tensor<string, []>("op_4599_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4600_cast_fp16 = softmax(axis = var_4428, x = aw_673_cast_fp16)[name = tensor<string, []>("op_4600_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4601_cast_fp16 = softmax(axis = var_4428, x = aw_675_cast_fp16)[name = tensor<string, []>("op_4601_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4602_cast_fp16 = softmax(axis = var_4428, x = aw_677_cast_fp16)[name = tensor<string, []>("op_4602_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4603_cast_fp16 = softmax(axis = var_4428, x = aw_679_cast_fp16)[name = tensor<string, []>("op_4603_cast_fp16")];
+            tensor<string, []> var_4605_equation_0 = const()[name = tensor<string, []>("op_4605_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4605_cast_fp16 = einsum(equation = var_4605_equation_0, values = (var_4523_cast_fp16_0, var_4584_cast_fp16))[name = tensor<string, []>("op_4605_cast_fp16")];
+            tensor<string, []> var_4607_equation_0 = const()[name = tensor<string, []>("op_4607_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4607_cast_fp16 = einsum(equation = var_4607_equation_0, values = (var_4523_cast_fp16_1, var_4585_cast_fp16))[name = tensor<string, []>("op_4607_cast_fp16")];
+            tensor<string, []> var_4609_equation_0 = const()[name = tensor<string, []>("op_4609_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4609_cast_fp16 = einsum(equation = var_4609_equation_0, values = (var_4523_cast_fp16_2, var_4586_cast_fp16))[name = tensor<string, []>("op_4609_cast_fp16")];
+            tensor<string, []> var_4611_equation_0 = const()[name = tensor<string, []>("op_4611_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4611_cast_fp16 = einsum(equation = var_4611_equation_0, values = (var_4523_cast_fp16_3, var_4587_cast_fp16))[name = tensor<string, []>("op_4611_cast_fp16")];
+            tensor<string, []> var_4613_equation_0 = const()[name = tensor<string, []>("op_4613_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4613_cast_fp16 = einsum(equation = var_4613_equation_0, values = (var_4523_cast_fp16_4, var_4588_cast_fp16))[name = tensor<string, []>("op_4613_cast_fp16")];
+            tensor<string, []> var_4615_equation_0 = const()[name = tensor<string, []>("op_4615_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4615_cast_fp16 = einsum(equation = var_4615_equation_0, values = (var_4523_cast_fp16_5, var_4589_cast_fp16))[name = tensor<string, []>("op_4615_cast_fp16")];
+            tensor<string, []> var_4617_equation_0 = const()[name = tensor<string, []>("op_4617_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4617_cast_fp16 = einsum(equation = var_4617_equation_0, values = (var_4523_cast_fp16_6, var_4590_cast_fp16))[name = tensor<string, []>("op_4617_cast_fp16")];
+            tensor<string, []> var_4619_equation_0 = const()[name = tensor<string, []>("op_4619_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4619_cast_fp16 = einsum(equation = var_4619_equation_0, values = (var_4523_cast_fp16_7, var_4591_cast_fp16))[name = tensor<string, []>("op_4619_cast_fp16")];
+            tensor<string, []> var_4621_equation_0 = const()[name = tensor<string, []>("op_4621_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4621_cast_fp16 = einsum(equation = var_4621_equation_0, values = (var_4523_cast_fp16_8, var_4592_cast_fp16))[name = tensor<string, []>("op_4621_cast_fp16")];
+            tensor<string, []> var_4623_equation_0 = const()[name = tensor<string, []>("op_4623_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4623_cast_fp16 = einsum(equation = var_4623_equation_0, values = (var_4523_cast_fp16_9, var_4593_cast_fp16))[name = tensor<string, []>("op_4623_cast_fp16")];
+            tensor<string, []> var_4625_equation_0 = const()[name = tensor<string, []>("op_4625_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4625_cast_fp16 = einsum(equation = var_4625_equation_0, values = (var_4523_cast_fp16_10, var_4594_cast_fp16))[name = tensor<string, []>("op_4625_cast_fp16")];
+            tensor<string, []> var_4627_equation_0 = const()[name = tensor<string, []>("op_4627_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4627_cast_fp16 = einsum(equation = var_4627_equation_0, values = (var_4523_cast_fp16_11, var_4595_cast_fp16))[name = tensor<string, []>("op_4627_cast_fp16")];
+            tensor<string, []> var_4629_equation_0 = const()[name = tensor<string, []>("op_4629_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4629_cast_fp16 = einsum(equation = var_4629_equation_0, values = (var_4523_cast_fp16_12, var_4596_cast_fp16))[name = tensor<string, []>("op_4629_cast_fp16")];
+            tensor<string, []> var_4631_equation_0 = const()[name = tensor<string, []>("op_4631_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4631_cast_fp16 = einsum(equation = var_4631_equation_0, values = (var_4523_cast_fp16_13, var_4597_cast_fp16))[name = tensor<string, []>("op_4631_cast_fp16")];
+            tensor<string, []> var_4633_equation_0 = const()[name = tensor<string, []>("op_4633_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4633_cast_fp16 = einsum(equation = var_4633_equation_0, values = (var_4523_cast_fp16_14, var_4598_cast_fp16))[name = tensor<string, []>("op_4633_cast_fp16")];
+            tensor<string, []> var_4635_equation_0 = const()[name = tensor<string, []>("op_4635_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4635_cast_fp16 = einsum(equation = var_4635_equation_0, values = (var_4523_cast_fp16_15, var_4599_cast_fp16))[name = tensor<string, []>("op_4635_cast_fp16")];
+            tensor<string, []> var_4637_equation_0 = const()[name = tensor<string, []>("op_4637_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4637_cast_fp16 = einsum(equation = var_4637_equation_0, values = (var_4523_cast_fp16_16, var_4600_cast_fp16))[name = tensor<string, []>("op_4637_cast_fp16")];
+            tensor<string, []> var_4639_equation_0 = const()[name = tensor<string, []>("op_4639_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4639_cast_fp16 = einsum(equation = var_4639_equation_0, values = (var_4523_cast_fp16_17, var_4601_cast_fp16))[name = tensor<string, []>("op_4639_cast_fp16")];
+            tensor<string, []> var_4641_equation_0 = const()[name = tensor<string, []>("op_4641_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4641_cast_fp16 = einsum(equation = var_4641_equation_0, values = (var_4523_cast_fp16_18, var_4602_cast_fp16))[name = tensor<string, []>("op_4641_cast_fp16")];
+            tensor<string, []> var_4643_equation_0 = const()[name = tensor<string, []>("op_4643_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4643_cast_fp16 = einsum(equation = var_4643_equation_0, values = (var_4523_cast_fp16_19, var_4603_cast_fp16))[name = tensor<string, []>("op_4643_cast_fp16")];
+            tensor<bool, []> input_165_interleave_0 = const()[name = tensor<string, []>("input_165_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_165_cast_fp16 = concat(axis = var_4428, interleave = input_165_interleave_0, values = (var_4605_cast_fp16, var_4607_cast_fp16, var_4609_cast_fp16, var_4611_cast_fp16, var_4613_cast_fp16, var_4615_cast_fp16, var_4617_cast_fp16, var_4619_cast_fp16, var_4621_cast_fp16, var_4623_cast_fp16, var_4625_cast_fp16, var_4627_cast_fp16, var_4629_cast_fp16, var_4631_cast_fp16, var_4633_cast_fp16, var_4635_cast_fp16, var_4637_cast_fp16, var_4639_cast_fp16, var_4641_cast_fp16, var_4643_cast_fp16))[name = tensor<string, []>("input_165_cast_fp16")];
+            tensor<string, []> var_4652_pad_type_0 = const()[name = tensor<string, []>("op_4652_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4652_strides_0 = const()[name = tensor<string, []>("op_4652_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4652_pad_0 = const()[name = tensor<string, []>("op_4652_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4652_dilations_0 = const()[name = tensor<string, []>("op_4652_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4652_groups_0 = const()[name = tensor<string, []>("op_4652_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_16_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(653783872)))];
+            tensor<fp16, [1280]> blocks_16_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(657060736)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4652_cast_fp16 = conv(bias = blocks_16_attn_out_bias_to_fp16, dilations = var_4652_dilations_0, groups = var_4652_groups_0, pad = var_4652_pad_0, pad_type = var_4652_pad_type_0, strides = var_4652_strides_0, weight = blocks_16_attn_out_weight_to_fp16, x = input_165_cast_fp16)[name = tensor<string, []>("op_4652_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = var_4652_cast_fp16)[name = tensor<string, []>("inputs_67_cast_fp16")];
+            tensor<int32, [1]> input_167_axes_0 = const()[name = tensor<string, []>("input_167_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_167_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_167_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(657063360)))];
+            tensor<fp16, [1280]> input_167_beta_0_to_fp16 = const()[name = tensor<string, []>("input_167_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(657065984)))];
+            tensor<fp16, []> var_4662_to_fp16 = const()[name = tensor<string, []>("op_4662_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_167_cast_fp16 = layer_norm(axes = input_167_axes_0, beta = input_167_beta_0_to_fp16, epsilon = var_4662_to_fp16, gamma = input_167_gamma_0_to_fp16, x = inputs_67_cast_fp16)[name = tensor<string, []>("input_167_cast_fp16")];
+            tensor<string, []> input_169_pad_type_0 = const()[name = tensor<string, []>("input_169_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_169_strides_0 = const()[name = tensor<string, []>("input_169_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_169_pad_0 = const()[name = tensor<string, []>("input_169_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_169_dilations_0 = const()[name = tensor<string, []>("input_169_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_169_groups_0 = const()[name = tensor<string, []>("input_169_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_16_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(657068608)))];
+            tensor<fp16, [5120]> blocks_16_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(670175872)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_169_cast_fp16 = conv(bias = blocks_16_mlp_0_bias_to_fp16, dilations = input_169_dilations_0, groups = input_169_groups_0, pad = input_169_pad_0, pad_type = input_169_pad_type_0, strides = input_169_strides_0, weight = blocks_16_mlp_0_weight_to_fp16, x = input_167_cast_fp16)[name = tensor<string, []>("input_169_cast_fp16")];
+            tensor<string, []> input_171_mode_0 = const()[name = tensor<string, []>("input_171_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_171_cast_fp16 = gelu(mode = input_171_mode_0, x = input_169_cast_fp16)[name = tensor<string, []>("input_171_cast_fp16")];
+            tensor<string, []> var_4688_pad_type_0 = const()[name = tensor<string, []>("op_4688_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4688_strides_0 = const()[name = tensor<string, []>("op_4688_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4688_pad_0 = const()[name = tensor<string, []>("op_4688_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4688_dilations_0 = const()[name = tensor<string, []>("op_4688_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4688_groups_0 = const()[name = tensor<string, []>("op_4688_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_16_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(670186176)))];
+            tensor<fp16, [1280]> blocks_16_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(683293440)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4688_cast_fp16 = conv(bias = blocks_16_mlp_2_bias_to_fp16, dilations = var_4688_dilations_0, groups = var_4688_groups_0, pad = var_4688_pad_0, pad_type = var_4688_pad_type_0, strides = var_4688_strides_0, weight = blocks_16_mlp_2_weight_to_fp16, x = input_171_cast_fp16)[name = tensor<string, []>("op_4688_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = var_4688_cast_fp16)[name = tensor<string, []>("inputs_69_cast_fp16")];
+            tensor<int32, []> var_4697 = const()[name = tensor<string, []>("op_4697"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_173_axes_0 = const()[name = tensor<string, []>("input_173_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_173_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_173_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(683296064)))];
+            tensor<fp16, [1280]> input_173_beta_0_to_fp16 = const()[name = tensor<string, []>("input_173_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(683298688)))];
+            tensor<fp16, []> var_4713_to_fp16 = const()[name = tensor<string, []>("op_4713_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_173_cast_fp16 = layer_norm(axes = input_173_axes_0, beta = input_173_beta_0_to_fp16, epsilon = var_4713_to_fp16, gamma = input_173_gamma_0_to_fp16, x = inputs_69_cast_fp16)[name = tensor<string, []>("input_173_cast_fp16")];
+            tensor<string, []> q_35_pad_type_0 = const()[name = tensor<string, []>("q_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_35_strides_0 = const()[name = tensor<string, []>("q_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_35_pad_0 = const()[name = tensor<string, []>("q_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_35_dilations_0 = const()[name = tensor<string, []>("q_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_35_groups_0 = const()[name = tensor<string, []>("q_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_4748_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4748_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(683301312)))];
+            tensor<fp16, [1280]> var_4748_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4748_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(686578176)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4748_cast_fp16 = conv(bias = var_4748_bias_0_to_fp16, dilations = q_35_dilations_0, groups = q_35_groups_0, pad = q_35_pad_0, pad_type = q_35_pad_type_0, strides = q_35_strides_0, weight = var_4748_weight_0_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("op_4748_cast_fp16")];
+            tensor<string, []> k_35_pad_type_0 = const()[name = tensor<string, []>("k_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_35_strides_0 = const()[name = tensor<string, []>("k_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_35_pad_0 = const()[name = tensor<string, []>("k_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_35_dilations_0 = const()[name = tensor<string, []>("k_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_35_groups_0 = const()[name = tensor<string, []>("k_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_17_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(686580800)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_35_cast_fp16 = conv(dilations = k_35_dilations_0, groups = k_35_groups_0, pad = k_35_pad_0, pad_type = k_35_pad_type_0, strides = k_35_strides_0, weight = blocks_17_attn_key_weight_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("k_35_cast_fp16")];
+            tensor<string, []> var_4746_pad_type_0 = const()[name = tensor<string, []>("op_4746_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4746_strides_0 = const()[name = tensor<string, []>("op_4746_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4746_pad_0 = const()[name = tensor<string, []>("op_4746_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4746_dilations_0 = const()[name = tensor<string, []>("op_4746_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4746_groups_0 = const()[name = tensor<string, []>("op_4746_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_17_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(689857664)))];
+            tensor<fp16, [1280]> blocks_17_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(693134528)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4746_cast_fp16 = conv(bias = blocks_17_attn_value_bias_to_fp16, dilations = var_4746_dilations_0, groups = var_4746_groups_0, pad = var_4746_pad_0, pad_type = var_4746_pad_type_0, strides = var_4746_strides_0, weight = blocks_17_attn_value_weight_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("op_4746_cast_fp16")];
+            tensor<int32, [20]> tile_51 = const()[name = tensor<string, []>("tile_51"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4749_axis_0 = const()[name = tensor<string, []>("op_4749_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_19 = split(axis = var_4749_axis_0, split_sizes = tile_51, x = var_4748_cast_fp16)[name = tensor<string, []>("op_4749_cast_fp16")];
+            tensor<int32, [4]> var_4770_perm_0 = const()[name = tensor<string, []>("op_4770_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_52 = const()[name = tensor<string, []>("tile_52"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4771_axis_0 = const()[name = tensor<string, []>("op_4771_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_4770_cast_fp16 = transpose(perm = var_4770_perm_0, x = k_35_cast_fp16)[name = tensor<string, []>("transpose_15")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_19 = split(axis = var_4771_axis_0, split_sizes = tile_52, x = var_4770_cast_fp16)[name = tensor<string, []>("op_4771_cast_fp16")];
+            tensor<int32, [20]> tile_53 = const()[name = tensor<string, []>("tile_53"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4792_axis_0 = const()[name = tensor<string, []>("op_4792_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_19 = split(axis = var_4792_axis_0, split_sizes = tile_53, x = var_4746_cast_fp16)[name = tensor<string, []>("op_4792_cast_fp16")];
+            tensor<string, []> aw_681_equation_0 = const()[name = tensor<string, []>("aw_681_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_681_cast_fp16 = einsum(equation = aw_681_equation_0, values = (var_4771_cast_fp16_0, var_4749_cast_fp16_0))[name = tensor<string, []>("aw_681_cast_fp16")];
+            tensor<string, []> aw_683_equation_0 = const()[name = tensor<string, []>("aw_683_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_683_cast_fp16 = einsum(equation = aw_683_equation_0, values = (var_4771_cast_fp16_1, var_4749_cast_fp16_1))[name = tensor<string, []>("aw_683_cast_fp16")];
+            tensor<string, []> aw_685_equation_0 = const()[name = tensor<string, []>("aw_685_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_685_cast_fp16 = einsum(equation = aw_685_equation_0, values = (var_4771_cast_fp16_2, var_4749_cast_fp16_2))[name = tensor<string, []>("aw_685_cast_fp16")];
+            tensor<string, []> aw_687_equation_0 = const()[name = tensor<string, []>("aw_687_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_687_cast_fp16 = einsum(equation = aw_687_equation_0, values = (var_4771_cast_fp16_3, var_4749_cast_fp16_3))[name = tensor<string, []>("aw_687_cast_fp16")];
+            tensor<string, []> aw_689_equation_0 = const()[name = tensor<string, []>("aw_689_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_689_cast_fp16 = einsum(equation = aw_689_equation_0, values = (var_4771_cast_fp16_4, var_4749_cast_fp16_4))[name = tensor<string, []>("aw_689_cast_fp16")];
+            tensor<string, []> aw_691_equation_0 = const()[name = tensor<string, []>("aw_691_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_691_cast_fp16 = einsum(equation = aw_691_equation_0, values = (var_4771_cast_fp16_5, var_4749_cast_fp16_5))[name = tensor<string, []>("aw_691_cast_fp16")];
+            tensor<string, []> aw_693_equation_0 = const()[name = tensor<string, []>("aw_693_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_693_cast_fp16 = einsum(equation = aw_693_equation_0, values = (var_4771_cast_fp16_6, var_4749_cast_fp16_6))[name = tensor<string, []>("aw_693_cast_fp16")];
+            tensor<string, []> aw_695_equation_0 = const()[name = tensor<string, []>("aw_695_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_695_cast_fp16 = einsum(equation = aw_695_equation_0, values = (var_4771_cast_fp16_7, var_4749_cast_fp16_7))[name = tensor<string, []>("aw_695_cast_fp16")];
+            tensor<string, []> aw_697_equation_0 = const()[name = tensor<string, []>("aw_697_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_697_cast_fp16 = einsum(equation = aw_697_equation_0, values = (var_4771_cast_fp16_8, var_4749_cast_fp16_8))[name = tensor<string, []>("aw_697_cast_fp16")];
+            tensor<string, []> aw_699_equation_0 = const()[name = tensor<string, []>("aw_699_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_699_cast_fp16 = einsum(equation = aw_699_equation_0, values = (var_4771_cast_fp16_9, var_4749_cast_fp16_9))[name = tensor<string, []>("aw_699_cast_fp16")];
+            tensor<string, []> aw_701_equation_0 = const()[name = tensor<string, []>("aw_701_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_701_cast_fp16 = einsum(equation = aw_701_equation_0, values = (var_4771_cast_fp16_10, var_4749_cast_fp16_10))[name = tensor<string, []>("aw_701_cast_fp16")];
+            tensor<string, []> aw_703_equation_0 = const()[name = tensor<string, []>("aw_703_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_703_cast_fp16 = einsum(equation = aw_703_equation_0, values = (var_4771_cast_fp16_11, var_4749_cast_fp16_11))[name = tensor<string, []>("aw_703_cast_fp16")];
+            tensor<string, []> aw_705_equation_0 = const()[name = tensor<string, []>("aw_705_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_705_cast_fp16 = einsum(equation = aw_705_equation_0, values = (var_4771_cast_fp16_12, var_4749_cast_fp16_12))[name = tensor<string, []>("aw_705_cast_fp16")];
+            tensor<string, []> aw_707_equation_0 = const()[name = tensor<string, []>("aw_707_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_707_cast_fp16 = einsum(equation = aw_707_equation_0, values = (var_4771_cast_fp16_13, var_4749_cast_fp16_13))[name = tensor<string, []>("aw_707_cast_fp16")];
+            tensor<string, []> aw_709_equation_0 = const()[name = tensor<string, []>("aw_709_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_709_cast_fp16 = einsum(equation = aw_709_equation_0, values = (var_4771_cast_fp16_14, var_4749_cast_fp16_14))[name = tensor<string, []>("aw_709_cast_fp16")];
+            tensor<string, []> aw_711_equation_0 = const()[name = tensor<string, []>("aw_711_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_711_cast_fp16 = einsum(equation = aw_711_equation_0, values = (var_4771_cast_fp16_15, var_4749_cast_fp16_15))[name = tensor<string, []>("aw_711_cast_fp16")];
+            tensor<string, []> aw_713_equation_0 = const()[name = tensor<string, []>("aw_713_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_713_cast_fp16 = einsum(equation = aw_713_equation_0, values = (var_4771_cast_fp16_16, var_4749_cast_fp16_16))[name = tensor<string, []>("aw_713_cast_fp16")];
+            tensor<string, []> aw_715_equation_0 = const()[name = tensor<string, []>("aw_715_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_715_cast_fp16 = einsum(equation = aw_715_equation_0, values = (var_4771_cast_fp16_17, var_4749_cast_fp16_17))[name = tensor<string, []>("aw_715_cast_fp16")];
+            tensor<string, []> aw_717_equation_0 = const()[name = tensor<string, []>("aw_717_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_717_cast_fp16 = einsum(equation = aw_717_equation_0, values = (var_4771_cast_fp16_18, var_4749_cast_fp16_18))[name = tensor<string, []>("aw_717_cast_fp16")];
+            tensor<string, []> aw_719_equation_0 = const()[name = tensor<string, []>("aw_719_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_719_cast_fp16 = einsum(equation = aw_719_equation_0, values = (var_4771_cast_fp16_19, var_4749_cast_fp16_19))[name = tensor<string, []>("aw_719_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4853_cast_fp16 = softmax(axis = var_4697, x = aw_681_cast_fp16)[name = tensor<string, []>("op_4853_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4854_cast_fp16 = softmax(axis = var_4697, x = aw_683_cast_fp16)[name = tensor<string, []>("op_4854_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4855_cast_fp16 = softmax(axis = var_4697, x = aw_685_cast_fp16)[name = tensor<string, []>("op_4855_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4856_cast_fp16 = softmax(axis = var_4697, x = aw_687_cast_fp16)[name = tensor<string, []>("op_4856_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4857_cast_fp16 = softmax(axis = var_4697, x = aw_689_cast_fp16)[name = tensor<string, []>("op_4857_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4858_cast_fp16 = softmax(axis = var_4697, x = aw_691_cast_fp16)[name = tensor<string, []>("op_4858_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4859_cast_fp16 = softmax(axis = var_4697, x = aw_693_cast_fp16)[name = tensor<string, []>("op_4859_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4860_cast_fp16 = softmax(axis = var_4697, x = aw_695_cast_fp16)[name = tensor<string, []>("op_4860_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4861_cast_fp16 = softmax(axis = var_4697, x = aw_697_cast_fp16)[name = tensor<string, []>("op_4861_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4862_cast_fp16 = softmax(axis = var_4697, x = aw_699_cast_fp16)[name = tensor<string, []>("op_4862_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4863_cast_fp16 = softmax(axis = var_4697, x = aw_701_cast_fp16)[name = tensor<string, []>("op_4863_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4864_cast_fp16 = softmax(axis = var_4697, x = aw_703_cast_fp16)[name = tensor<string, []>("op_4864_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4865_cast_fp16 = softmax(axis = var_4697, x = aw_705_cast_fp16)[name = tensor<string, []>("op_4865_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4866_cast_fp16 = softmax(axis = var_4697, x = aw_707_cast_fp16)[name = tensor<string, []>("op_4866_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4867_cast_fp16 = softmax(axis = var_4697, x = aw_709_cast_fp16)[name = tensor<string, []>("op_4867_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4868_cast_fp16 = softmax(axis = var_4697, x = aw_711_cast_fp16)[name = tensor<string, []>("op_4868_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4869_cast_fp16 = softmax(axis = var_4697, x = aw_713_cast_fp16)[name = tensor<string, []>("op_4869_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4870_cast_fp16 = softmax(axis = var_4697, x = aw_715_cast_fp16)[name = tensor<string, []>("op_4870_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4871_cast_fp16 = softmax(axis = var_4697, x = aw_717_cast_fp16)[name = tensor<string, []>("op_4871_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4872_cast_fp16 = softmax(axis = var_4697, x = aw_719_cast_fp16)[name = tensor<string, []>("op_4872_cast_fp16")];
+            tensor<string, []> var_4874_equation_0 = const()[name = tensor<string, []>("op_4874_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4874_cast_fp16 = einsum(equation = var_4874_equation_0, values = (var_4792_cast_fp16_0, var_4853_cast_fp16))[name = tensor<string, []>("op_4874_cast_fp16")];
+            tensor<string, []> var_4876_equation_0 = const()[name = tensor<string, []>("op_4876_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4876_cast_fp16 = einsum(equation = var_4876_equation_0, values = (var_4792_cast_fp16_1, var_4854_cast_fp16))[name = tensor<string, []>("op_4876_cast_fp16")];
+            tensor<string, []> var_4878_equation_0 = const()[name = tensor<string, []>("op_4878_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4878_cast_fp16 = einsum(equation = var_4878_equation_0, values = (var_4792_cast_fp16_2, var_4855_cast_fp16))[name = tensor<string, []>("op_4878_cast_fp16")];
+            tensor<string, []> var_4880_equation_0 = const()[name = tensor<string, []>("op_4880_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4880_cast_fp16 = einsum(equation = var_4880_equation_0, values = (var_4792_cast_fp16_3, var_4856_cast_fp16))[name = tensor<string, []>("op_4880_cast_fp16")];
+            tensor<string, []> var_4882_equation_0 = const()[name = tensor<string, []>("op_4882_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4882_cast_fp16 = einsum(equation = var_4882_equation_0, values = (var_4792_cast_fp16_4, var_4857_cast_fp16))[name = tensor<string, []>("op_4882_cast_fp16")];
+            tensor<string, []> var_4884_equation_0 = const()[name = tensor<string, []>("op_4884_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4884_cast_fp16 = einsum(equation = var_4884_equation_0, values = (var_4792_cast_fp16_5, var_4858_cast_fp16))[name = tensor<string, []>("op_4884_cast_fp16")];
+            tensor<string, []> var_4886_equation_0 = const()[name = tensor<string, []>("op_4886_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4886_cast_fp16 = einsum(equation = var_4886_equation_0, values = (var_4792_cast_fp16_6, var_4859_cast_fp16))[name = tensor<string, []>("op_4886_cast_fp16")];
+            tensor<string, []> var_4888_equation_0 = const()[name = tensor<string, []>("op_4888_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4888_cast_fp16 = einsum(equation = var_4888_equation_0, values = (var_4792_cast_fp16_7, var_4860_cast_fp16))[name = tensor<string, []>("op_4888_cast_fp16")];
+            tensor<string, []> var_4890_equation_0 = const()[name = tensor<string, []>("op_4890_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4890_cast_fp16 = einsum(equation = var_4890_equation_0, values = (var_4792_cast_fp16_8, var_4861_cast_fp16))[name = tensor<string, []>("op_4890_cast_fp16")];
+            tensor<string, []> var_4892_equation_0 = const()[name = tensor<string, []>("op_4892_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4892_cast_fp16 = einsum(equation = var_4892_equation_0, values = (var_4792_cast_fp16_9, var_4862_cast_fp16))[name = tensor<string, []>("op_4892_cast_fp16")];
+            tensor<string, []> var_4894_equation_0 = const()[name = tensor<string, []>("op_4894_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4894_cast_fp16 = einsum(equation = var_4894_equation_0, values = (var_4792_cast_fp16_10, var_4863_cast_fp16))[name = tensor<string, []>("op_4894_cast_fp16")];
+            tensor<string, []> var_4896_equation_0 = const()[name = tensor<string, []>("op_4896_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4896_cast_fp16 = einsum(equation = var_4896_equation_0, values = (var_4792_cast_fp16_11, var_4864_cast_fp16))[name = tensor<string, []>("op_4896_cast_fp16")];
+            tensor<string, []> var_4898_equation_0 = const()[name = tensor<string, []>("op_4898_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4898_cast_fp16 = einsum(equation = var_4898_equation_0, values = (var_4792_cast_fp16_12, var_4865_cast_fp16))[name = tensor<string, []>("op_4898_cast_fp16")];
+            tensor<string, []> var_4900_equation_0 = const()[name = tensor<string, []>("op_4900_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16 = einsum(equation = var_4900_equation_0, values = (var_4792_cast_fp16_13, var_4866_cast_fp16))[name = tensor<string, []>("op_4900_cast_fp16")];
+            tensor<string, []> var_4902_equation_0 = const()[name = tensor<string, []>("op_4902_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4902_cast_fp16 = einsum(equation = var_4902_equation_0, values = (var_4792_cast_fp16_14, var_4867_cast_fp16))[name = tensor<string, []>("op_4902_cast_fp16")];
+            tensor<string, []> var_4904_equation_0 = const()[name = tensor<string, []>("op_4904_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4904_cast_fp16 = einsum(equation = var_4904_equation_0, values = (var_4792_cast_fp16_15, var_4868_cast_fp16))[name = tensor<string, []>("op_4904_cast_fp16")];
+            tensor<string, []> var_4906_equation_0 = const()[name = tensor<string, []>("op_4906_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4906_cast_fp16 = einsum(equation = var_4906_equation_0, values = (var_4792_cast_fp16_16, var_4869_cast_fp16))[name = tensor<string, []>("op_4906_cast_fp16")];
+            tensor<string, []> var_4908_equation_0 = const()[name = tensor<string, []>("op_4908_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4908_cast_fp16 = einsum(equation = var_4908_equation_0, values = (var_4792_cast_fp16_17, var_4870_cast_fp16))[name = tensor<string, []>("op_4908_cast_fp16")];
+            tensor<string, []> var_4910_equation_0 = const()[name = tensor<string, []>("op_4910_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4910_cast_fp16 = einsum(equation = var_4910_equation_0, values = (var_4792_cast_fp16_18, var_4871_cast_fp16))[name = tensor<string, []>("op_4910_cast_fp16")];
+            tensor<string, []> var_4912_equation_0 = const()[name = tensor<string, []>("op_4912_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4912_cast_fp16 = einsum(equation = var_4912_equation_0, values = (var_4792_cast_fp16_19, var_4872_cast_fp16))[name = tensor<string, []>("op_4912_cast_fp16")];
+            tensor<bool, []> input_175_interleave_0 = const()[name = tensor<string, []>("input_175_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_175_cast_fp16 = concat(axis = var_4697, interleave = input_175_interleave_0, values = (var_4874_cast_fp16, var_4876_cast_fp16, var_4878_cast_fp16, var_4880_cast_fp16, var_4882_cast_fp16, var_4884_cast_fp16, var_4886_cast_fp16, var_4888_cast_fp16, var_4890_cast_fp16, var_4892_cast_fp16, var_4894_cast_fp16, var_4896_cast_fp16, var_4898_cast_fp16, var_4900_cast_fp16, var_4902_cast_fp16, var_4904_cast_fp16, var_4906_cast_fp16, var_4908_cast_fp16, var_4910_cast_fp16, var_4912_cast_fp16))[name = tensor<string, []>("input_175_cast_fp16")];
+            tensor<string, []> var_4921_pad_type_0 = const()[name = tensor<string, []>("op_4921_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4921_strides_0 = const()[name = tensor<string, []>("op_4921_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4921_pad_0 = const()[name = tensor<string, []>("op_4921_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4921_dilations_0 = const()[name = tensor<string, []>("op_4921_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4921_groups_0 = const()[name = tensor<string, []>("op_4921_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_17_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(693137152)))];
+            tensor<fp16, [1280]> blocks_17_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(696414016)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4921_cast_fp16 = conv(bias = blocks_17_attn_out_bias_to_fp16, dilations = var_4921_dilations_0, groups = var_4921_groups_0, pad = var_4921_pad_0, pad_type = var_4921_pad_type_0, strides = var_4921_strides_0, weight = blocks_17_attn_out_weight_to_fp16, x = input_175_cast_fp16)[name = tensor<string, []>("op_4921_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = var_4921_cast_fp16)[name = tensor<string, []>("inputs_71_cast_fp16")];
+            tensor<int32, [1]> input_177_axes_0 = const()[name = tensor<string, []>("input_177_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_177_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_177_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(696416640)))];
+            tensor<fp16, [1280]> input_177_beta_0_to_fp16 = const()[name = tensor<string, []>("input_177_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(696419264)))];
+            tensor<fp16, []> var_4931_to_fp16 = const()[name = tensor<string, []>("op_4931_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_177_cast_fp16 = layer_norm(axes = input_177_axes_0, beta = input_177_beta_0_to_fp16, epsilon = var_4931_to_fp16, gamma = input_177_gamma_0_to_fp16, x = inputs_71_cast_fp16)[name = tensor<string, []>("input_177_cast_fp16")];
+            tensor<string, []> input_179_pad_type_0 = const()[name = tensor<string, []>("input_179_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_179_strides_0 = const()[name = tensor<string, []>("input_179_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_179_pad_0 = const()[name = tensor<string, []>("input_179_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_179_dilations_0 = const()[name = tensor<string, []>("input_179_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_179_groups_0 = const()[name = tensor<string, []>("input_179_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_17_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(696421888)))];
+            tensor<fp16, [5120]> blocks_17_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(709529152)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_179_cast_fp16 = conv(bias = blocks_17_mlp_0_bias_to_fp16, dilations = input_179_dilations_0, groups = input_179_groups_0, pad = input_179_pad_0, pad_type = input_179_pad_type_0, strides = input_179_strides_0, weight = blocks_17_mlp_0_weight_to_fp16, x = input_177_cast_fp16)[name = tensor<string, []>("input_179_cast_fp16")];
+            tensor<string, []> input_181_mode_0 = const()[name = tensor<string, []>("input_181_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_181_cast_fp16 = gelu(mode = input_181_mode_0, x = input_179_cast_fp16)[name = tensor<string, []>("input_181_cast_fp16")];
+            tensor<string, []> var_4957_pad_type_0 = const()[name = tensor<string, []>("op_4957_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4957_strides_0 = const()[name = tensor<string, []>("op_4957_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4957_pad_0 = const()[name = tensor<string, []>("op_4957_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4957_dilations_0 = const()[name = tensor<string, []>("op_4957_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4957_groups_0 = const()[name = tensor<string, []>("op_4957_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_17_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(709539456)))];
+            tensor<fp16, [1280]> blocks_17_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(722646720)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4957_cast_fp16 = conv(bias = blocks_17_mlp_2_bias_to_fp16, dilations = var_4957_dilations_0, groups = var_4957_groups_0, pad = var_4957_pad_0, pad_type = var_4957_pad_type_0, strides = var_4957_strides_0, weight = blocks_17_mlp_2_weight_to_fp16, x = input_181_cast_fp16)[name = tensor<string, []>("op_4957_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_73_cast_fp16 = add(x = inputs_71_cast_fp16, y = var_4957_cast_fp16)[name = tensor<string, []>("inputs_73_cast_fp16")];
+            tensor<int32, []> var_4966 = const()[name = tensor<string, []>("op_4966"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_183_axes_0 = const()[name = tensor<string, []>("input_183_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_183_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_183_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(722649344)))];
+            tensor<fp16, [1280]> input_183_beta_0_to_fp16 = const()[name = tensor<string, []>("input_183_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(722651968)))];
+            tensor<fp16, []> var_4982_to_fp16 = const()[name = tensor<string, []>("op_4982_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_183_cast_fp16 = layer_norm(axes = input_183_axes_0, beta = input_183_beta_0_to_fp16, epsilon = var_4982_to_fp16, gamma = input_183_gamma_0_to_fp16, x = inputs_73_cast_fp16)[name = tensor<string, []>("input_183_cast_fp16")];
+            tensor<string, []> q_37_pad_type_0 = const()[name = tensor<string, []>("q_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_37_strides_0 = const()[name = tensor<string, []>("q_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_37_pad_0 = const()[name = tensor<string, []>("q_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_37_dilations_0 = const()[name = tensor<string, []>("q_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_37_groups_0 = const()[name = tensor<string, []>("q_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_5017_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5017_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(722654592)))];
+            tensor<fp16, [1280]> var_5017_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5017_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(725931456)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5017_cast_fp16 = conv(bias = var_5017_bias_0_to_fp16, dilations = q_37_dilations_0, groups = q_37_groups_0, pad = q_37_pad_0, pad_type = q_37_pad_type_0, strides = q_37_strides_0, weight = var_5017_weight_0_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("op_5017_cast_fp16")];
+            tensor<string, []> k_37_pad_type_0 = const()[name = tensor<string, []>("k_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_37_strides_0 = const()[name = tensor<string, []>("k_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_37_pad_0 = const()[name = tensor<string, []>("k_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_37_dilations_0 = const()[name = tensor<string, []>("k_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_37_groups_0 = const()[name = tensor<string, []>("k_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_18_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(725934080)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_37_cast_fp16 = conv(dilations = k_37_dilations_0, groups = k_37_groups_0, pad = k_37_pad_0, pad_type = k_37_pad_type_0, strides = k_37_strides_0, weight = blocks_18_attn_key_weight_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("k_37_cast_fp16")];
+            tensor<string, []> var_5015_pad_type_0 = const()[name = tensor<string, []>("op_5015_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5015_strides_0 = const()[name = tensor<string, []>("op_5015_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5015_pad_0 = const()[name = tensor<string, []>("op_5015_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5015_dilations_0 = const()[name = tensor<string, []>("op_5015_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5015_groups_0 = const()[name = tensor<string, []>("op_5015_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_18_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(729210944)))];
+            tensor<fp16, [1280]> blocks_18_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(732487808)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5015_cast_fp16 = conv(bias = blocks_18_attn_value_bias_to_fp16, dilations = var_5015_dilations_0, groups = var_5015_groups_0, pad = var_5015_pad_0, pad_type = var_5015_pad_type_0, strides = var_5015_strides_0, weight = blocks_18_attn_value_weight_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("op_5015_cast_fp16")];
+            tensor<int32, [20]> tile_54 = const()[name = tensor<string, []>("tile_54"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5018_axis_0 = const()[name = tensor<string, []>("op_5018_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_19 = split(axis = var_5018_axis_0, split_sizes = tile_54, x = var_5017_cast_fp16)[name = tensor<string, []>("op_5018_cast_fp16")];
+            tensor<int32, [4]> var_5039_perm_0 = const()[name = tensor<string, []>("op_5039_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_55 = const()[name = tensor<string, []>("tile_55"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5040_axis_0 = const()[name = tensor<string, []>("op_5040_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_5039_cast_fp16 = transpose(perm = var_5039_perm_0, x = k_37_cast_fp16)[name = tensor<string, []>("transpose_14")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_19 = split(axis = var_5040_axis_0, split_sizes = tile_55, x = var_5039_cast_fp16)[name = tensor<string, []>("op_5040_cast_fp16")];
+            tensor<int32, [20]> tile_56 = const()[name = tensor<string, []>("tile_56"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5061_axis_0 = const()[name = tensor<string, []>("op_5061_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_19 = split(axis = var_5061_axis_0, split_sizes = tile_56, x = var_5015_cast_fp16)[name = tensor<string, []>("op_5061_cast_fp16")];
+            tensor<string, []> aw_721_equation_0 = const()[name = tensor<string, []>("aw_721_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_721_cast_fp16 = einsum(equation = aw_721_equation_0, values = (var_5040_cast_fp16_0, var_5018_cast_fp16_0))[name = tensor<string, []>("aw_721_cast_fp16")];
+            tensor<string, []> aw_723_equation_0 = const()[name = tensor<string, []>("aw_723_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_723_cast_fp16 = einsum(equation = aw_723_equation_0, values = (var_5040_cast_fp16_1, var_5018_cast_fp16_1))[name = tensor<string, []>("aw_723_cast_fp16")];
+            tensor<string, []> aw_725_equation_0 = const()[name = tensor<string, []>("aw_725_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_725_cast_fp16 = einsum(equation = aw_725_equation_0, values = (var_5040_cast_fp16_2, var_5018_cast_fp16_2))[name = tensor<string, []>("aw_725_cast_fp16")];
+            tensor<string, []> aw_727_equation_0 = const()[name = tensor<string, []>("aw_727_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_727_cast_fp16 = einsum(equation = aw_727_equation_0, values = (var_5040_cast_fp16_3, var_5018_cast_fp16_3))[name = tensor<string, []>("aw_727_cast_fp16")];
+            tensor<string, []> aw_729_equation_0 = const()[name = tensor<string, []>("aw_729_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_729_cast_fp16 = einsum(equation = aw_729_equation_0, values = (var_5040_cast_fp16_4, var_5018_cast_fp16_4))[name = tensor<string, []>("aw_729_cast_fp16")];
+            tensor<string, []> aw_731_equation_0 = const()[name = tensor<string, []>("aw_731_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_731_cast_fp16 = einsum(equation = aw_731_equation_0, values = (var_5040_cast_fp16_5, var_5018_cast_fp16_5))[name = tensor<string, []>("aw_731_cast_fp16")];
+            tensor<string, []> aw_733_equation_0 = const()[name = tensor<string, []>("aw_733_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_733_cast_fp16 = einsum(equation = aw_733_equation_0, values = (var_5040_cast_fp16_6, var_5018_cast_fp16_6))[name = tensor<string, []>("aw_733_cast_fp16")];
+            tensor<string, []> aw_735_equation_0 = const()[name = tensor<string, []>("aw_735_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_735_cast_fp16 = einsum(equation = aw_735_equation_0, values = (var_5040_cast_fp16_7, var_5018_cast_fp16_7))[name = tensor<string, []>("aw_735_cast_fp16")];
+            tensor<string, []> aw_737_equation_0 = const()[name = tensor<string, []>("aw_737_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_737_cast_fp16 = einsum(equation = aw_737_equation_0, values = (var_5040_cast_fp16_8, var_5018_cast_fp16_8))[name = tensor<string, []>("aw_737_cast_fp16")];
+            tensor<string, []> aw_739_equation_0 = const()[name = tensor<string, []>("aw_739_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_739_cast_fp16 = einsum(equation = aw_739_equation_0, values = (var_5040_cast_fp16_9, var_5018_cast_fp16_9))[name = tensor<string, []>("aw_739_cast_fp16")];
+            tensor<string, []> aw_741_equation_0 = const()[name = tensor<string, []>("aw_741_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_741_cast_fp16 = einsum(equation = aw_741_equation_0, values = (var_5040_cast_fp16_10, var_5018_cast_fp16_10))[name = tensor<string, []>("aw_741_cast_fp16")];
+            tensor<string, []> aw_743_equation_0 = const()[name = tensor<string, []>("aw_743_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_743_cast_fp16 = einsum(equation = aw_743_equation_0, values = (var_5040_cast_fp16_11, var_5018_cast_fp16_11))[name = tensor<string, []>("aw_743_cast_fp16")];
+            tensor<string, []> aw_745_equation_0 = const()[name = tensor<string, []>("aw_745_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_745_cast_fp16 = einsum(equation = aw_745_equation_0, values = (var_5040_cast_fp16_12, var_5018_cast_fp16_12))[name = tensor<string, []>("aw_745_cast_fp16")];
+            tensor<string, []> aw_747_equation_0 = const()[name = tensor<string, []>("aw_747_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_747_cast_fp16 = einsum(equation = aw_747_equation_0, values = (var_5040_cast_fp16_13, var_5018_cast_fp16_13))[name = tensor<string, []>("aw_747_cast_fp16")];
+            tensor<string, []> aw_749_equation_0 = const()[name = tensor<string, []>("aw_749_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_749_cast_fp16 = einsum(equation = aw_749_equation_0, values = (var_5040_cast_fp16_14, var_5018_cast_fp16_14))[name = tensor<string, []>("aw_749_cast_fp16")];
+            tensor<string, []> aw_751_equation_0 = const()[name = tensor<string, []>("aw_751_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_751_cast_fp16 = einsum(equation = aw_751_equation_0, values = (var_5040_cast_fp16_15, var_5018_cast_fp16_15))[name = tensor<string, []>("aw_751_cast_fp16")];
+            tensor<string, []> aw_753_equation_0 = const()[name = tensor<string, []>("aw_753_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_753_cast_fp16 = einsum(equation = aw_753_equation_0, values = (var_5040_cast_fp16_16, var_5018_cast_fp16_16))[name = tensor<string, []>("aw_753_cast_fp16")];
+            tensor<string, []> aw_755_equation_0 = const()[name = tensor<string, []>("aw_755_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_755_cast_fp16 = einsum(equation = aw_755_equation_0, values = (var_5040_cast_fp16_17, var_5018_cast_fp16_17))[name = tensor<string, []>("aw_755_cast_fp16")];
+            tensor<string, []> aw_757_equation_0 = const()[name = tensor<string, []>("aw_757_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_757_cast_fp16 = einsum(equation = aw_757_equation_0, values = (var_5040_cast_fp16_18, var_5018_cast_fp16_18))[name = tensor<string, []>("aw_757_cast_fp16")];
+            tensor<string, []> aw_759_equation_0 = const()[name = tensor<string, []>("aw_759_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_759_cast_fp16 = einsum(equation = aw_759_equation_0, values = (var_5040_cast_fp16_19, var_5018_cast_fp16_19))[name = tensor<string, []>("aw_759_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5122_cast_fp16 = softmax(axis = var_4966, x = aw_721_cast_fp16)[name = tensor<string, []>("op_5122_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5123_cast_fp16 = softmax(axis = var_4966, x = aw_723_cast_fp16)[name = tensor<string, []>("op_5123_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5124_cast_fp16 = softmax(axis = var_4966, x = aw_725_cast_fp16)[name = tensor<string, []>("op_5124_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5125_cast_fp16 = softmax(axis = var_4966, x = aw_727_cast_fp16)[name = tensor<string, []>("op_5125_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5126_cast_fp16 = softmax(axis = var_4966, x = aw_729_cast_fp16)[name = tensor<string, []>("op_5126_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5127_cast_fp16 = softmax(axis = var_4966, x = aw_731_cast_fp16)[name = tensor<string, []>("op_5127_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5128_cast_fp16 = softmax(axis = var_4966, x = aw_733_cast_fp16)[name = tensor<string, []>("op_5128_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5129_cast_fp16 = softmax(axis = var_4966, x = aw_735_cast_fp16)[name = tensor<string, []>("op_5129_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5130_cast_fp16 = softmax(axis = var_4966, x = aw_737_cast_fp16)[name = tensor<string, []>("op_5130_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5131_cast_fp16 = softmax(axis = var_4966, x = aw_739_cast_fp16)[name = tensor<string, []>("op_5131_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5132_cast_fp16 = softmax(axis = var_4966, x = aw_741_cast_fp16)[name = tensor<string, []>("op_5132_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5133_cast_fp16 = softmax(axis = var_4966, x = aw_743_cast_fp16)[name = tensor<string, []>("op_5133_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5134_cast_fp16 = softmax(axis = var_4966, x = aw_745_cast_fp16)[name = tensor<string, []>("op_5134_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5135_cast_fp16 = softmax(axis = var_4966, x = aw_747_cast_fp16)[name = tensor<string, []>("op_5135_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5136_cast_fp16 = softmax(axis = var_4966, x = aw_749_cast_fp16)[name = tensor<string, []>("op_5136_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5137_cast_fp16 = softmax(axis = var_4966, x = aw_751_cast_fp16)[name = tensor<string, []>("op_5137_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5138_cast_fp16 = softmax(axis = var_4966, x = aw_753_cast_fp16)[name = tensor<string, []>("op_5138_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5139_cast_fp16 = softmax(axis = var_4966, x = aw_755_cast_fp16)[name = tensor<string, []>("op_5139_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5140_cast_fp16 = softmax(axis = var_4966, x = aw_757_cast_fp16)[name = tensor<string, []>("op_5140_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5141_cast_fp16 = softmax(axis = var_4966, x = aw_759_cast_fp16)[name = tensor<string, []>("op_5141_cast_fp16")];
+            tensor<string, []> var_5143_equation_0 = const()[name = tensor<string, []>("op_5143_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5143_cast_fp16 = einsum(equation = var_5143_equation_0, values = (var_5061_cast_fp16_0, var_5122_cast_fp16))[name = tensor<string, []>("op_5143_cast_fp16")];
+            tensor<string, []> var_5145_equation_0 = const()[name = tensor<string, []>("op_5145_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5145_cast_fp16 = einsum(equation = var_5145_equation_0, values = (var_5061_cast_fp16_1, var_5123_cast_fp16))[name = tensor<string, []>("op_5145_cast_fp16")];
+            tensor<string, []> var_5147_equation_0 = const()[name = tensor<string, []>("op_5147_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5147_cast_fp16 = einsum(equation = var_5147_equation_0, values = (var_5061_cast_fp16_2, var_5124_cast_fp16))[name = tensor<string, []>("op_5147_cast_fp16")];
+            tensor<string, []> var_5149_equation_0 = const()[name = tensor<string, []>("op_5149_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5149_cast_fp16 = einsum(equation = var_5149_equation_0, values = (var_5061_cast_fp16_3, var_5125_cast_fp16))[name = tensor<string, []>("op_5149_cast_fp16")];
+            tensor<string, []> var_5151_equation_0 = const()[name = tensor<string, []>("op_5151_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5151_cast_fp16 = einsum(equation = var_5151_equation_0, values = (var_5061_cast_fp16_4, var_5126_cast_fp16))[name = tensor<string, []>("op_5151_cast_fp16")];
+            tensor<string, []> var_5153_equation_0 = const()[name = tensor<string, []>("op_5153_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5153_cast_fp16 = einsum(equation = var_5153_equation_0, values = (var_5061_cast_fp16_5, var_5127_cast_fp16))[name = tensor<string, []>("op_5153_cast_fp16")];
+            tensor<string, []> var_5155_equation_0 = const()[name = tensor<string, []>("op_5155_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5155_cast_fp16 = einsum(equation = var_5155_equation_0, values = (var_5061_cast_fp16_6, var_5128_cast_fp16))[name = tensor<string, []>("op_5155_cast_fp16")];
+            tensor<string, []> var_5157_equation_0 = const()[name = tensor<string, []>("op_5157_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5157_cast_fp16 = einsum(equation = var_5157_equation_0, values = (var_5061_cast_fp16_7, var_5129_cast_fp16))[name = tensor<string, []>("op_5157_cast_fp16")];
+            tensor<string, []> var_5159_equation_0 = const()[name = tensor<string, []>("op_5159_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5159_cast_fp16 = einsum(equation = var_5159_equation_0, values = (var_5061_cast_fp16_8, var_5130_cast_fp16))[name = tensor<string, []>("op_5159_cast_fp16")];
+            tensor<string, []> var_5161_equation_0 = const()[name = tensor<string, []>("op_5161_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5161_cast_fp16 = einsum(equation = var_5161_equation_0, values = (var_5061_cast_fp16_9, var_5131_cast_fp16))[name = tensor<string, []>("op_5161_cast_fp16")];
+            tensor<string, []> var_5163_equation_0 = const()[name = tensor<string, []>("op_5163_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5163_cast_fp16 = einsum(equation = var_5163_equation_0, values = (var_5061_cast_fp16_10, var_5132_cast_fp16))[name = tensor<string, []>("op_5163_cast_fp16")];
+            tensor<string, []> var_5165_equation_0 = const()[name = tensor<string, []>("op_5165_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5165_cast_fp16 = einsum(equation = var_5165_equation_0, values = (var_5061_cast_fp16_11, var_5133_cast_fp16))[name = tensor<string, []>("op_5165_cast_fp16")];
+            tensor<string, []> var_5167_equation_0 = const()[name = tensor<string, []>("op_5167_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5167_cast_fp16 = einsum(equation = var_5167_equation_0, values = (var_5061_cast_fp16_12, var_5134_cast_fp16))[name = tensor<string, []>("op_5167_cast_fp16")];
+            tensor<string, []> var_5169_equation_0 = const()[name = tensor<string, []>("op_5169_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5169_cast_fp16 = einsum(equation = var_5169_equation_0, values = (var_5061_cast_fp16_13, var_5135_cast_fp16))[name = tensor<string, []>("op_5169_cast_fp16")];
+            tensor<string, []> var_5171_equation_0 = const()[name = tensor<string, []>("op_5171_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5171_cast_fp16 = einsum(equation = var_5171_equation_0, values = (var_5061_cast_fp16_14, var_5136_cast_fp16))[name = tensor<string, []>("op_5171_cast_fp16")];
+            tensor<string, []> var_5173_equation_0 = const()[name = tensor<string, []>("op_5173_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5173_cast_fp16 = einsum(equation = var_5173_equation_0, values = (var_5061_cast_fp16_15, var_5137_cast_fp16))[name = tensor<string, []>("op_5173_cast_fp16")];
+            tensor<string, []> var_5175_equation_0 = const()[name = tensor<string, []>("op_5175_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5175_cast_fp16 = einsum(equation = var_5175_equation_0, values = (var_5061_cast_fp16_16, var_5138_cast_fp16))[name = tensor<string, []>("op_5175_cast_fp16")];
+            tensor<string, []> var_5177_equation_0 = const()[name = tensor<string, []>("op_5177_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5177_cast_fp16 = einsum(equation = var_5177_equation_0, values = (var_5061_cast_fp16_17, var_5139_cast_fp16))[name = tensor<string, []>("op_5177_cast_fp16")];
+            tensor<string, []> var_5179_equation_0 = const()[name = tensor<string, []>("op_5179_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5179_cast_fp16 = einsum(equation = var_5179_equation_0, values = (var_5061_cast_fp16_18, var_5140_cast_fp16))[name = tensor<string, []>("op_5179_cast_fp16")];
+            tensor<string, []> var_5181_equation_0 = const()[name = tensor<string, []>("op_5181_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5181_cast_fp16 = einsum(equation = var_5181_equation_0, values = (var_5061_cast_fp16_19, var_5141_cast_fp16))[name = tensor<string, []>("op_5181_cast_fp16")];
+            tensor<bool, []> input_185_interleave_0 = const()[name = tensor<string, []>("input_185_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_185_cast_fp16 = concat(axis = var_4966, interleave = input_185_interleave_0, values = (var_5143_cast_fp16, var_5145_cast_fp16, var_5147_cast_fp16, var_5149_cast_fp16, var_5151_cast_fp16, var_5153_cast_fp16, var_5155_cast_fp16, var_5157_cast_fp16, var_5159_cast_fp16, var_5161_cast_fp16, var_5163_cast_fp16, var_5165_cast_fp16, var_5167_cast_fp16, var_5169_cast_fp16, var_5171_cast_fp16, var_5173_cast_fp16, var_5175_cast_fp16, var_5177_cast_fp16, var_5179_cast_fp16, var_5181_cast_fp16))[name = tensor<string, []>("input_185_cast_fp16")];
+            tensor<string, []> var_5190_pad_type_0 = const()[name = tensor<string, []>("op_5190_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5190_strides_0 = const()[name = tensor<string, []>("op_5190_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5190_pad_0 = const()[name = tensor<string, []>("op_5190_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5190_dilations_0 = const()[name = tensor<string, []>("op_5190_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5190_groups_0 = const()[name = tensor<string, []>("op_5190_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_18_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(732490432)))];
+            tensor<fp16, [1280]> blocks_18_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(735767296)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5190_cast_fp16 = conv(bias = blocks_18_attn_out_bias_to_fp16, dilations = var_5190_dilations_0, groups = var_5190_groups_0, pad = var_5190_pad_0, pad_type = var_5190_pad_type_0, strides = var_5190_strides_0, weight = blocks_18_attn_out_weight_to_fp16, x = input_185_cast_fp16)[name = tensor<string, []>("op_5190_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_75_cast_fp16 = add(x = inputs_73_cast_fp16, y = var_5190_cast_fp16)[name = tensor<string, []>("inputs_75_cast_fp16")];
+            tensor<int32, [1]> input_187_axes_0 = const()[name = tensor<string, []>("input_187_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_187_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_187_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(735769920)))];
+            tensor<fp16, [1280]> input_187_beta_0_to_fp16 = const()[name = tensor<string, []>("input_187_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(735772544)))];
+            tensor<fp16, []> var_5200_to_fp16 = const()[name = tensor<string, []>("op_5200_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_187_cast_fp16 = layer_norm(axes = input_187_axes_0, beta = input_187_beta_0_to_fp16, epsilon = var_5200_to_fp16, gamma = input_187_gamma_0_to_fp16, x = inputs_75_cast_fp16)[name = tensor<string, []>("input_187_cast_fp16")];
+            tensor<string, []> input_189_pad_type_0 = const()[name = tensor<string, []>("input_189_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_189_strides_0 = const()[name = tensor<string, []>("input_189_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_189_pad_0 = const()[name = tensor<string, []>("input_189_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_189_dilations_0 = const()[name = tensor<string, []>("input_189_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_189_groups_0 = const()[name = tensor<string, []>("input_189_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_18_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(735775168)))];
+            tensor<fp16, [5120]> blocks_18_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(748882432)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_189_cast_fp16 = conv(bias = blocks_18_mlp_0_bias_to_fp16, dilations = input_189_dilations_0, groups = input_189_groups_0, pad = input_189_pad_0, pad_type = input_189_pad_type_0, strides = input_189_strides_0, weight = blocks_18_mlp_0_weight_to_fp16, x = input_187_cast_fp16)[name = tensor<string, []>("input_189_cast_fp16")];
+            tensor<string, []> input_191_mode_0 = const()[name = tensor<string, []>("input_191_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_191_cast_fp16 = gelu(mode = input_191_mode_0, x = input_189_cast_fp16)[name = tensor<string, []>("input_191_cast_fp16")];
+            tensor<string, []> var_5226_pad_type_0 = const()[name = tensor<string, []>("op_5226_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5226_strides_0 = const()[name = tensor<string, []>("op_5226_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5226_pad_0 = const()[name = tensor<string, []>("op_5226_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5226_dilations_0 = const()[name = tensor<string, []>("op_5226_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5226_groups_0 = const()[name = tensor<string, []>("op_5226_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_18_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(748892736)))];
+            tensor<fp16, [1280]> blocks_18_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(762000000)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5226_cast_fp16 = conv(bias = blocks_18_mlp_2_bias_to_fp16, dilations = var_5226_dilations_0, groups = var_5226_groups_0, pad = var_5226_pad_0, pad_type = var_5226_pad_type_0, strides = var_5226_strides_0, weight = blocks_18_mlp_2_weight_to_fp16, x = input_191_cast_fp16)[name = tensor<string, []>("op_5226_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_77_cast_fp16 = add(x = inputs_75_cast_fp16, y = var_5226_cast_fp16)[name = tensor<string, []>("inputs_77_cast_fp16")];
+            tensor<int32, []> var_5235 = const()[name = tensor<string, []>("op_5235"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_193_axes_0 = const()[name = tensor<string, []>("input_193_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_193_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_193_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(762002624)))];
+            tensor<fp16, [1280]> input_193_beta_0_to_fp16 = const()[name = tensor<string, []>("input_193_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(762005248)))];
+            tensor<fp16, []> var_5251_to_fp16 = const()[name = tensor<string, []>("op_5251_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_193_cast_fp16 = layer_norm(axes = input_193_axes_0, beta = input_193_beta_0_to_fp16, epsilon = var_5251_to_fp16, gamma = input_193_gamma_0_to_fp16, x = inputs_77_cast_fp16)[name = tensor<string, []>("input_193_cast_fp16")];
+            tensor<string, []> q_39_pad_type_0 = const()[name = tensor<string, []>("q_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_39_strides_0 = const()[name = tensor<string, []>("q_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_39_pad_0 = const()[name = tensor<string, []>("q_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_39_dilations_0 = const()[name = tensor<string, []>("q_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_39_groups_0 = const()[name = tensor<string, []>("q_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_5286_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5286_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(762007872)))];
+            tensor<fp16, [1280]> var_5286_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5286_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(765284736)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5286_cast_fp16 = conv(bias = var_5286_bias_0_to_fp16, dilations = q_39_dilations_0, groups = q_39_groups_0, pad = q_39_pad_0, pad_type = q_39_pad_type_0, strides = q_39_strides_0, weight = var_5286_weight_0_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("op_5286_cast_fp16")];
+            tensor<string, []> k_39_pad_type_0 = const()[name = tensor<string, []>("k_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_39_strides_0 = const()[name = tensor<string, []>("k_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_39_pad_0 = const()[name = tensor<string, []>("k_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_39_dilations_0 = const()[name = tensor<string, []>("k_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_39_groups_0 = const()[name = tensor<string, []>("k_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_19_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(765287360)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_39_cast_fp16 = conv(dilations = k_39_dilations_0, groups = k_39_groups_0, pad = k_39_pad_0, pad_type = k_39_pad_type_0, strides = k_39_strides_0, weight = blocks_19_attn_key_weight_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("k_39_cast_fp16")];
+            tensor<string, []> var_5284_pad_type_0 = const()[name = tensor<string, []>("op_5284_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5284_strides_0 = const()[name = tensor<string, []>("op_5284_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5284_pad_0 = const()[name = tensor<string, []>("op_5284_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5284_dilations_0 = const()[name = tensor<string, []>("op_5284_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5284_groups_0 = const()[name = tensor<string, []>("op_5284_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_19_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(768564224)))];
+            tensor<fp16, [1280]> blocks_19_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(771841088)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5284_cast_fp16 = conv(bias = blocks_19_attn_value_bias_to_fp16, dilations = var_5284_dilations_0, groups = var_5284_groups_0, pad = var_5284_pad_0, pad_type = var_5284_pad_type_0, strides = var_5284_strides_0, weight = blocks_19_attn_value_weight_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("op_5284_cast_fp16")];
+            tensor<int32, [20]> tile_57 = const()[name = tensor<string, []>("tile_57"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5287_axis_0 = const()[name = tensor<string, []>("op_5287_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_19 = split(axis = var_5287_axis_0, split_sizes = tile_57, x = var_5286_cast_fp16)[name = tensor<string, []>("op_5287_cast_fp16")];
+            tensor<int32, [4]> var_5308_perm_0 = const()[name = tensor<string, []>("op_5308_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_58 = const()[name = tensor<string, []>("tile_58"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5309_axis_0 = const()[name = tensor<string, []>("op_5309_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_5308_cast_fp16 = transpose(perm = var_5308_perm_0, x = k_39_cast_fp16)[name = tensor<string, []>("transpose_13")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_19 = split(axis = var_5309_axis_0, split_sizes = tile_58, x = var_5308_cast_fp16)[name = tensor<string, []>("op_5309_cast_fp16")];
+            tensor<int32, [20]> tile_59 = const()[name = tensor<string, []>("tile_59"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5330_axis_0 = const()[name = tensor<string, []>("op_5330_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_19 = split(axis = var_5330_axis_0, split_sizes = tile_59, x = var_5284_cast_fp16)[name = tensor<string, []>("op_5330_cast_fp16")];
+            tensor<string, []> aw_761_equation_0 = const()[name = tensor<string, []>("aw_761_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_761_cast_fp16 = einsum(equation = aw_761_equation_0, values = (var_5309_cast_fp16_0, var_5287_cast_fp16_0))[name = tensor<string, []>("aw_761_cast_fp16")];
+            tensor<string, []> aw_763_equation_0 = const()[name = tensor<string, []>("aw_763_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_763_cast_fp16 = einsum(equation = aw_763_equation_0, values = (var_5309_cast_fp16_1, var_5287_cast_fp16_1))[name = tensor<string, []>("aw_763_cast_fp16")];
+            tensor<string, []> aw_765_equation_0 = const()[name = tensor<string, []>("aw_765_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_765_cast_fp16 = einsum(equation = aw_765_equation_0, values = (var_5309_cast_fp16_2, var_5287_cast_fp16_2))[name = tensor<string, []>("aw_765_cast_fp16")];
+            tensor<string, []> aw_767_equation_0 = const()[name = tensor<string, []>("aw_767_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_767_cast_fp16 = einsum(equation = aw_767_equation_0, values = (var_5309_cast_fp16_3, var_5287_cast_fp16_3))[name = tensor<string, []>("aw_767_cast_fp16")];
+            tensor<string, []> aw_769_equation_0 = const()[name = tensor<string, []>("aw_769_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_769_cast_fp16 = einsum(equation = aw_769_equation_0, values = (var_5309_cast_fp16_4, var_5287_cast_fp16_4))[name = tensor<string, []>("aw_769_cast_fp16")];
+            tensor<string, []> aw_771_equation_0 = const()[name = tensor<string, []>("aw_771_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_771_cast_fp16 = einsum(equation = aw_771_equation_0, values = (var_5309_cast_fp16_5, var_5287_cast_fp16_5))[name = tensor<string, []>("aw_771_cast_fp16")];
+            tensor<string, []> aw_773_equation_0 = const()[name = tensor<string, []>("aw_773_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_773_cast_fp16 = einsum(equation = aw_773_equation_0, values = (var_5309_cast_fp16_6, var_5287_cast_fp16_6))[name = tensor<string, []>("aw_773_cast_fp16")];
+            tensor<string, []> aw_775_equation_0 = const()[name = tensor<string, []>("aw_775_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_775_cast_fp16 = einsum(equation = aw_775_equation_0, values = (var_5309_cast_fp16_7, var_5287_cast_fp16_7))[name = tensor<string, []>("aw_775_cast_fp16")];
+            tensor<string, []> aw_777_equation_0 = const()[name = tensor<string, []>("aw_777_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_777_cast_fp16 = einsum(equation = aw_777_equation_0, values = (var_5309_cast_fp16_8, var_5287_cast_fp16_8))[name = tensor<string, []>("aw_777_cast_fp16")];
+            tensor<string, []> aw_779_equation_0 = const()[name = tensor<string, []>("aw_779_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_779_cast_fp16 = einsum(equation = aw_779_equation_0, values = (var_5309_cast_fp16_9, var_5287_cast_fp16_9))[name = tensor<string, []>("aw_779_cast_fp16")];
+            tensor<string, []> aw_781_equation_0 = const()[name = tensor<string, []>("aw_781_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_781_cast_fp16 = einsum(equation = aw_781_equation_0, values = (var_5309_cast_fp16_10, var_5287_cast_fp16_10))[name = tensor<string, []>("aw_781_cast_fp16")];
+            tensor<string, []> aw_783_equation_0 = const()[name = tensor<string, []>("aw_783_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_783_cast_fp16 = einsum(equation = aw_783_equation_0, values = (var_5309_cast_fp16_11, var_5287_cast_fp16_11))[name = tensor<string, []>("aw_783_cast_fp16")];
+            tensor<string, []> aw_785_equation_0 = const()[name = tensor<string, []>("aw_785_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_785_cast_fp16 = einsum(equation = aw_785_equation_0, values = (var_5309_cast_fp16_12, var_5287_cast_fp16_12))[name = tensor<string, []>("aw_785_cast_fp16")];
+            tensor<string, []> aw_787_equation_0 = const()[name = tensor<string, []>("aw_787_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_787_cast_fp16 = einsum(equation = aw_787_equation_0, values = (var_5309_cast_fp16_13, var_5287_cast_fp16_13))[name = tensor<string, []>("aw_787_cast_fp16")];
+            tensor<string, []> aw_789_equation_0 = const()[name = tensor<string, []>("aw_789_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_789_cast_fp16 = einsum(equation = aw_789_equation_0, values = (var_5309_cast_fp16_14, var_5287_cast_fp16_14))[name = tensor<string, []>("aw_789_cast_fp16")];
+            tensor<string, []> aw_791_equation_0 = const()[name = tensor<string, []>("aw_791_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_791_cast_fp16 = einsum(equation = aw_791_equation_0, values = (var_5309_cast_fp16_15, var_5287_cast_fp16_15))[name = tensor<string, []>("aw_791_cast_fp16")];
+            tensor<string, []> aw_793_equation_0 = const()[name = tensor<string, []>("aw_793_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_793_cast_fp16 = einsum(equation = aw_793_equation_0, values = (var_5309_cast_fp16_16, var_5287_cast_fp16_16))[name = tensor<string, []>("aw_793_cast_fp16")];
+            tensor<string, []> aw_795_equation_0 = const()[name = tensor<string, []>("aw_795_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_795_cast_fp16 = einsum(equation = aw_795_equation_0, values = (var_5309_cast_fp16_17, var_5287_cast_fp16_17))[name = tensor<string, []>("aw_795_cast_fp16")];
+            tensor<string, []> aw_797_equation_0 = const()[name = tensor<string, []>("aw_797_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_797_cast_fp16 = einsum(equation = aw_797_equation_0, values = (var_5309_cast_fp16_18, var_5287_cast_fp16_18))[name = tensor<string, []>("aw_797_cast_fp16")];
+            tensor<string, []> aw_799_equation_0 = const()[name = tensor<string, []>("aw_799_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_799_cast_fp16 = einsum(equation = aw_799_equation_0, values = (var_5309_cast_fp16_19, var_5287_cast_fp16_19))[name = tensor<string, []>("aw_799_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5391_cast_fp16 = softmax(axis = var_5235, x = aw_761_cast_fp16)[name = tensor<string, []>("op_5391_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5392_cast_fp16 = softmax(axis = var_5235, x = aw_763_cast_fp16)[name = tensor<string, []>("op_5392_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5393_cast_fp16 = softmax(axis = var_5235, x = aw_765_cast_fp16)[name = tensor<string, []>("op_5393_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5394_cast_fp16 = softmax(axis = var_5235, x = aw_767_cast_fp16)[name = tensor<string, []>("op_5394_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5395_cast_fp16 = softmax(axis = var_5235, x = aw_769_cast_fp16)[name = tensor<string, []>("op_5395_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5396_cast_fp16 = softmax(axis = var_5235, x = aw_771_cast_fp16)[name = tensor<string, []>("op_5396_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5397_cast_fp16 = softmax(axis = var_5235, x = aw_773_cast_fp16)[name = tensor<string, []>("op_5397_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5398_cast_fp16 = softmax(axis = var_5235, x = aw_775_cast_fp16)[name = tensor<string, []>("op_5398_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5399_cast_fp16 = softmax(axis = var_5235, x = aw_777_cast_fp16)[name = tensor<string, []>("op_5399_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5400_cast_fp16 = softmax(axis = var_5235, x = aw_779_cast_fp16)[name = tensor<string, []>("op_5400_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5401_cast_fp16 = softmax(axis = var_5235, x = aw_781_cast_fp16)[name = tensor<string, []>("op_5401_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5402_cast_fp16 = softmax(axis = var_5235, x = aw_783_cast_fp16)[name = tensor<string, []>("op_5402_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5403_cast_fp16 = softmax(axis = var_5235, x = aw_785_cast_fp16)[name = tensor<string, []>("op_5403_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5404_cast_fp16 = softmax(axis = var_5235, x = aw_787_cast_fp16)[name = tensor<string, []>("op_5404_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5405_cast_fp16 = softmax(axis = var_5235, x = aw_789_cast_fp16)[name = tensor<string, []>("op_5405_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5406_cast_fp16 = softmax(axis = var_5235, x = aw_791_cast_fp16)[name = tensor<string, []>("op_5406_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5407_cast_fp16 = softmax(axis = var_5235, x = aw_793_cast_fp16)[name = tensor<string, []>("op_5407_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5408_cast_fp16 = softmax(axis = var_5235, x = aw_795_cast_fp16)[name = tensor<string, []>("op_5408_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5409_cast_fp16 = softmax(axis = var_5235, x = aw_797_cast_fp16)[name = tensor<string, []>("op_5409_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5410_cast_fp16 = softmax(axis = var_5235, x = aw_799_cast_fp16)[name = tensor<string, []>("op_5410_cast_fp16")];
+            tensor<string, []> var_5412_equation_0 = const()[name = tensor<string, []>("op_5412_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5412_cast_fp16 = einsum(equation = var_5412_equation_0, values = (var_5330_cast_fp16_0, var_5391_cast_fp16))[name = tensor<string, []>("op_5412_cast_fp16")];
+            tensor<string, []> var_5414_equation_0 = const()[name = tensor<string, []>("op_5414_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5414_cast_fp16 = einsum(equation = var_5414_equation_0, values = (var_5330_cast_fp16_1, var_5392_cast_fp16))[name = tensor<string, []>("op_5414_cast_fp16")];
+            tensor<string, []> var_5416_equation_0 = const()[name = tensor<string, []>("op_5416_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5416_cast_fp16 = einsum(equation = var_5416_equation_0, values = (var_5330_cast_fp16_2, var_5393_cast_fp16))[name = tensor<string, []>("op_5416_cast_fp16")];
+            tensor<string, []> var_5418_equation_0 = const()[name = tensor<string, []>("op_5418_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5418_cast_fp16 = einsum(equation = var_5418_equation_0, values = (var_5330_cast_fp16_3, var_5394_cast_fp16))[name = tensor<string, []>("op_5418_cast_fp16")];
+            tensor<string, []> var_5420_equation_0 = const()[name = tensor<string, []>("op_5420_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5420_cast_fp16 = einsum(equation = var_5420_equation_0, values = (var_5330_cast_fp16_4, var_5395_cast_fp16))[name = tensor<string, []>("op_5420_cast_fp16")];
+            tensor<string, []> var_5422_equation_0 = const()[name = tensor<string, []>("op_5422_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5422_cast_fp16 = einsum(equation = var_5422_equation_0, values = (var_5330_cast_fp16_5, var_5396_cast_fp16))[name = tensor<string, []>("op_5422_cast_fp16")];
+            tensor<string, []> var_5424_equation_0 = const()[name = tensor<string, []>("op_5424_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5424_cast_fp16 = einsum(equation = var_5424_equation_0, values = (var_5330_cast_fp16_6, var_5397_cast_fp16))[name = tensor<string, []>("op_5424_cast_fp16")];
+            tensor<string, []> var_5426_equation_0 = const()[name = tensor<string, []>("op_5426_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5426_cast_fp16 = einsum(equation = var_5426_equation_0, values = (var_5330_cast_fp16_7, var_5398_cast_fp16))[name = tensor<string, []>("op_5426_cast_fp16")];
+            tensor<string, []> var_5428_equation_0 = const()[name = tensor<string, []>("op_5428_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5428_cast_fp16 = einsum(equation = var_5428_equation_0, values = (var_5330_cast_fp16_8, var_5399_cast_fp16))[name = tensor<string, []>("op_5428_cast_fp16")];
+            tensor<string, []> var_5430_equation_0 = const()[name = tensor<string, []>("op_5430_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5430_cast_fp16 = einsum(equation = var_5430_equation_0, values = (var_5330_cast_fp16_9, var_5400_cast_fp16))[name = tensor<string, []>("op_5430_cast_fp16")];
+            tensor<string, []> var_5432_equation_0 = const()[name = tensor<string, []>("op_5432_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5432_cast_fp16 = einsum(equation = var_5432_equation_0, values = (var_5330_cast_fp16_10, var_5401_cast_fp16))[name = tensor<string, []>("op_5432_cast_fp16")];
+            tensor<string, []> var_5434_equation_0 = const()[name = tensor<string, []>("op_5434_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5434_cast_fp16 = einsum(equation = var_5434_equation_0, values = (var_5330_cast_fp16_11, var_5402_cast_fp16))[name = tensor<string, []>("op_5434_cast_fp16")];
+            tensor<string, []> var_5436_equation_0 = const()[name = tensor<string, []>("op_5436_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5436_cast_fp16 = einsum(equation = var_5436_equation_0, values = (var_5330_cast_fp16_12, var_5403_cast_fp16))[name = tensor<string, []>("op_5436_cast_fp16")];
+            tensor<string, []> var_5438_equation_0 = const()[name = tensor<string, []>("op_5438_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5438_cast_fp16 = einsum(equation = var_5438_equation_0, values = (var_5330_cast_fp16_13, var_5404_cast_fp16))[name = tensor<string, []>("op_5438_cast_fp16")];
+            tensor<string, []> var_5440_equation_0 = const()[name = tensor<string, []>("op_5440_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5440_cast_fp16 = einsum(equation = var_5440_equation_0, values = (var_5330_cast_fp16_14, var_5405_cast_fp16))[name = tensor<string, []>("op_5440_cast_fp16")];
+            tensor<string, []> var_5442_equation_0 = const()[name = tensor<string, []>("op_5442_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5442_cast_fp16 = einsum(equation = var_5442_equation_0, values = (var_5330_cast_fp16_15, var_5406_cast_fp16))[name = tensor<string, []>("op_5442_cast_fp16")];
+            tensor<string, []> var_5444_equation_0 = const()[name = tensor<string, []>("op_5444_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5444_cast_fp16 = einsum(equation = var_5444_equation_0, values = (var_5330_cast_fp16_16, var_5407_cast_fp16))[name = tensor<string, []>("op_5444_cast_fp16")];
+            tensor<string, []> var_5446_equation_0 = const()[name = tensor<string, []>("op_5446_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5446_cast_fp16 = einsum(equation = var_5446_equation_0, values = (var_5330_cast_fp16_17, var_5408_cast_fp16))[name = tensor<string, []>("op_5446_cast_fp16")];
+            tensor<string, []> var_5448_equation_0 = const()[name = tensor<string, []>("op_5448_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5448_cast_fp16 = einsum(equation = var_5448_equation_0, values = (var_5330_cast_fp16_18, var_5409_cast_fp16))[name = tensor<string, []>("op_5448_cast_fp16")];
+            tensor<string, []> var_5450_equation_0 = const()[name = tensor<string, []>("op_5450_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5450_cast_fp16 = einsum(equation = var_5450_equation_0, values = (var_5330_cast_fp16_19, var_5410_cast_fp16))[name = tensor<string, []>("op_5450_cast_fp16")];
+            tensor<bool, []> input_195_interleave_0 = const()[name = tensor<string, []>("input_195_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_195_cast_fp16 = concat(axis = var_5235, interleave = input_195_interleave_0, values = (var_5412_cast_fp16, var_5414_cast_fp16, var_5416_cast_fp16, var_5418_cast_fp16, var_5420_cast_fp16, var_5422_cast_fp16, var_5424_cast_fp16, var_5426_cast_fp16, var_5428_cast_fp16, var_5430_cast_fp16, var_5432_cast_fp16, var_5434_cast_fp16, var_5436_cast_fp16, var_5438_cast_fp16, var_5440_cast_fp16, var_5442_cast_fp16, var_5444_cast_fp16, var_5446_cast_fp16, var_5448_cast_fp16, var_5450_cast_fp16))[name = tensor<string, []>("input_195_cast_fp16")];
+            tensor<string, []> var_5459_pad_type_0 = const()[name = tensor<string, []>("op_5459_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5459_strides_0 = const()[name = tensor<string, []>("op_5459_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5459_pad_0 = const()[name = tensor<string, []>("op_5459_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5459_dilations_0 = const()[name = tensor<string, []>("op_5459_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5459_groups_0 = const()[name = tensor<string, []>("op_5459_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_19_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(771843712)))];
+            tensor<fp16, [1280]> blocks_19_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(775120576)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5459_cast_fp16 = conv(bias = blocks_19_attn_out_bias_to_fp16, dilations = var_5459_dilations_0, groups = var_5459_groups_0, pad = var_5459_pad_0, pad_type = var_5459_pad_type_0, strides = var_5459_strides_0, weight = blocks_19_attn_out_weight_to_fp16, x = input_195_cast_fp16)[name = tensor<string, []>("op_5459_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_79_cast_fp16 = add(x = inputs_77_cast_fp16, y = var_5459_cast_fp16)[name = tensor<string, []>("inputs_79_cast_fp16")];
+            tensor<int32, [1]> input_197_axes_0 = const()[name = tensor<string, []>("input_197_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_197_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_197_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(775123200)))];
+            tensor<fp16, [1280]> input_197_beta_0_to_fp16 = const()[name = tensor<string, []>("input_197_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(775125824)))];
+            tensor<fp16, []> var_5469_to_fp16 = const()[name = tensor<string, []>("op_5469_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_197_cast_fp16 = layer_norm(axes = input_197_axes_0, beta = input_197_beta_0_to_fp16, epsilon = var_5469_to_fp16, gamma = input_197_gamma_0_to_fp16, x = inputs_79_cast_fp16)[name = tensor<string, []>("input_197_cast_fp16")];
+            tensor<string, []> input_199_pad_type_0 = const()[name = tensor<string, []>("input_199_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_199_strides_0 = const()[name = tensor<string, []>("input_199_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_199_pad_0 = const()[name = tensor<string, []>("input_199_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_199_dilations_0 = const()[name = tensor<string, []>("input_199_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_199_groups_0 = const()[name = tensor<string, []>("input_199_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_19_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(775128448)))];
+            tensor<fp16, [5120]> blocks_19_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(788235712)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_199_cast_fp16 = conv(bias = blocks_19_mlp_0_bias_to_fp16, dilations = input_199_dilations_0, groups = input_199_groups_0, pad = input_199_pad_0, pad_type = input_199_pad_type_0, strides = input_199_strides_0, weight = blocks_19_mlp_0_weight_to_fp16, x = input_197_cast_fp16)[name = tensor<string, []>("input_199_cast_fp16")];
+            tensor<string, []> input_201_mode_0 = const()[name = tensor<string, []>("input_201_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_201_cast_fp16 = gelu(mode = input_201_mode_0, x = input_199_cast_fp16)[name = tensor<string, []>("input_201_cast_fp16")];
+            tensor<string, []> var_5495_pad_type_0 = const()[name = tensor<string, []>("op_5495_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5495_strides_0 = const()[name = tensor<string, []>("op_5495_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5495_pad_0 = const()[name = tensor<string, []>("op_5495_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5495_dilations_0 = const()[name = tensor<string, []>("op_5495_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5495_groups_0 = const()[name = tensor<string, []>("op_5495_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_19_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(788246016)))];
+            tensor<fp16, [1280]> blocks_19_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(801353280)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5495_cast_fp16 = conv(bias = blocks_19_mlp_2_bias_to_fp16, dilations = var_5495_dilations_0, groups = var_5495_groups_0, pad = var_5495_pad_0, pad_type = var_5495_pad_type_0, strides = var_5495_strides_0, weight = blocks_19_mlp_2_weight_to_fp16, x = input_201_cast_fp16)[name = tensor<string, []>("op_5495_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_81_cast_fp16 = add(x = inputs_79_cast_fp16, y = var_5495_cast_fp16)[name = tensor<string, []>("inputs_81_cast_fp16")];
+            tensor<int32, []> var_5504 = const()[name = tensor<string, []>("op_5504"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_203_axes_0 = const()[name = tensor<string, []>("input_203_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_203_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_203_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(801355904)))];
+            tensor<fp16, [1280]> input_203_beta_0_to_fp16 = const()[name = tensor<string, []>("input_203_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(801358528)))];
+            tensor<fp16, []> var_5520_to_fp16 = const()[name = tensor<string, []>("op_5520_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_203_cast_fp16 = layer_norm(axes = input_203_axes_0, beta = input_203_beta_0_to_fp16, epsilon = var_5520_to_fp16, gamma = input_203_gamma_0_to_fp16, x = inputs_81_cast_fp16)[name = tensor<string, []>("input_203_cast_fp16")];
+            tensor<string, []> q_41_pad_type_0 = const()[name = tensor<string, []>("q_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_41_strides_0 = const()[name = tensor<string, []>("q_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_41_pad_0 = const()[name = tensor<string, []>("q_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_41_dilations_0 = const()[name = tensor<string, []>("q_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_41_groups_0 = const()[name = tensor<string, []>("q_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_5555_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5555_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(801361152)))];
+            tensor<fp16, [1280]> var_5555_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5555_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(804638016)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5555_cast_fp16 = conv(bias = var_5555_bias_0_to_fp16, dilations = q_41_dilations_0, groups = q_41_groups_0, pad = q_41_pad_0, pad_type = q_41_pad_type_0, strides = q_41_strides_0, weight = var_5555_weight_0_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("op_5555_cast_fp16")];
+            tensor<string, []> k_41_pad_type_0 = const()[name = tensor<string, []>("k_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_41_strides_0 = const()[name = tensor<string, []>("k_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_41_pad_0 = const()[name = tensor<string, []>("k_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_41_dilations_0 = const()[name = tensor<string, []>("k_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_41_groups_0 = const()[name = tensor<string, []>("k_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_20_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(804640640)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_41_cast_fp16 = conv(dilations = k_41_dilations_0, groups = k_41_groups_0, pad = k_41_pad_0, pad_type = k_41_pad_type_0, strides = k_41_strides_0, weight = blocks_20_attn_key_weight_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("k_41_cast_fp16")];
+            tensor<string, []> var_5553_pad_type_0 = const()[name = tensor<string, []>("op_5553_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5553_strides_0 = const()[name = tensor<string, []>("op_5553_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5553_pad_0 = const()[name = tensor<string, []>("op_5553_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5553_dilations_0 = const()[name = tensor<string, []>("op_5553_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5553_groups_0 = const()[name = tensor<string, []>("op_5553_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_20_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(807917504)))];
+            tensor<fp16, [1280]> blocks_20_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(811194368)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5553_cast_fp16 = conv(bias = blocks_20_attn_value_bias_to_fp16, dilations = var_5553_dilations_0, groups = var_5553_groups_0, pad = var_5553_pad_0, pad_type = var_5553_pad_type_0, strides = var_5553_strides_0, weight = blocks_20_attn_value_weight_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("op_5553_cast_fp16")];
+            tensor<int32, [20]> tile_60 = const()[name = tensor<string, []>("tile_60"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5556_axis_0 = const()[name = tensor<string, []>("op_5556_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_19 = split(axis = var_5556_axis_0, split_sizes = tile_60, x = var_5555_cast_fp16)[name = tensor<string, []>("op_5556_cast_fp16")];
+            tensor<int32, [4]> var_5577_perm_0 = const()[name = tensor<string, []>("op_5577_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_61 = const()[name = tensor<string, []>("tile_61"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5578_axis_0 = const()[name = tensor<string, []>("op_5578_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_5577_cast_fp16 = transpose(perm = var_5577_perm_0, x = k_41_cast_fp16)[name = tensor<string, []>("transpose_12")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_19 = split(axis = var_5578_axis_0, split_sizes = tile_61, x = var_5577_cast_fp16)[name = tensor<string, []>("op_5578_cast_fp16")];
+            tensor<int32, [20]> tile_62 = const()[name = tensor<string, []>("tile_62"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5599_axis_0 = const()[name = tensor<string, []>("op_5599_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_19 = split(axis = var_5599_axis_0, split_sizes = tile_62, x = var_5553_cast_fp16)[name = tensor<string, []>("op_5599_cast_fp16")];
+            tensor<string, []> aw_801_equation_0 = const()[name = tensor<string, []>("aw_801_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_801_cast_fp16 = einsum(equation = aw_801_equation_0, values = (var_5578_cast_fp16_0, var_5556_cast_fp16_0))[name = tensor<string, []>("aw_801_cast_fp16")];
+            tensor<string, []> aw_803_equation_0 = const()[name = tensor<string, []>("aw_803_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_803_cast_fp16 = einsum(equation = aw_803_equation_0, values = (var_5578_cast_fp16_1, var_5556_cast_fp16_1))[name = tensor<string, []>("aw_803_cast_fp16")];
+            tensor<string, []> aw_805_equation_0 = const()[name = tensor<string, []>("aw_805_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_805_cast_fp16 = einsum(equation = aw_805_equation_0, values = (var_5578_cast_fp16_2, var_5556_cast_fp16_2))[name = tensor<string, []>("aw_805_cast_fp16")];
+            tensor<string, []> aw_807_equation_0 = const()[name = tensor<string, []>("aw_807_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_807_cast_fp16 = einsum(equation = aw_807_equation_0, values = (var_5578_cast_fp16_3, var_5556_cast_fp16_3))[name = tensor<string, []>("aw_807_cast_fp16")];
+            tensor<string, []> aw_809_equation_0 = const()[name = tensor<string, []>("aw_809_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_809_cast_fp16 = einsum(equation = aw_809_equation_0, values = (var_5578_cast_fp16_4, var_5556_cast_fp16_4))[name = tensor<string, []>("aw_809_cast_fp16")];
+            tensor<string, []> aw_811_equation_0 = const()[name = tensor<string, []>("aw_811_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_811_cast_fp16 = einsum(equation = aw_811_equation_0, values = (var_5578_cast_fp16_5, var_5556_cast_fp16_5))[name = tensor<string, []>("aw_811_cast_fp16")];
+            tensor<string, []> aw_813_equation_0 = const()[name = tensor<string, []>("aw_813_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_813_cast_fp16 = einsum(equation = aw_813_equation_0, values = (var_5578_cast_fp16_6, var_5556_cast_fp16_6))[name = tensor<string, []>("aw_813_cast_fp16")];
+            tensor<string, []> aw_815_equation_0 = const()[name = tensor<string, []>("aw_815_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_815_cast_fp16 = einsum(equation = aw_815_equation_0, values = (var_5578_cast_fp16_7, var_5556_cast_fp16_7))[name = tensor<string, []>("aw_815_cast_fp16")];
+            tensor<string, []> aw_817_equation_0 = const()[name = tensor<string, []>("aw_817_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_817_cast_fp16 = einsum(equation = aw_817_equation_0, values = (var_5578_cast_fp16_8, var_5556_cast_fp16_8))[name = tensor<string, []>("aw_817_cast_fp16")];
+            tensor<string, []> aw_819_equation_0 = const()[name = tensor<string, []>("aw_819_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_819_cast_fp16 = einsum(equation = aw_819_equation_0, values = (var_5578_cast_fp16_9, var_5556_cast_fp16_9))[name = tensor<string, []>("aw_819_cast_fp16")];
+            tensor<string, []> aw_821_equation_0 = const()[name = tensor<string, []>("aw_821_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_821_cast_fp16 = einsum(equation = aw_821_equation_0, values = (var_5578_cast_fp16_10, var_5556_cast_fp16_10))[name = tensor<string, []>("aw_821_cast_fp16")];
+            tensor<string, []> aw_823_equation_0 = const()[name = tensor<string, []>("aw_823_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_823_cast_fp16 = einsum(equation = aw_823_equation_0, values = (var_5578_cast_fp16_11, var_5556_cast_fp16_11))[name = tensor<string, []>("aw_823_cast_fp16")];
+            tensor<string, []> aw_825_equation_0 = const()[name = tensor<string, []>("aw_825_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_825_cast_fp16 = einsum(equation = aw_825_equation_0, values = (var_5578_cast_fp16_12, var_5556_cast_fp16_12))[name = tensor<string, []>("aw_825_cast_fp16")];
+            tensor<string, []> aw_827_equation_0 = const()[name = tensor<string, []>("aw_827_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_827_cast_fp16 = einsum(equation = aw_827_equation_0, values = (var_5578_cast_fp16_13, var_5556_cast_fp16_13))[name = tensor<string, []>("aw_827_cast_fp16")];
+            tensor<string, []> aw_829_equation_0 = const()[name = tensor<string, []>("aw_829_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_829_cast_fp16 = einsum(equation = aw_829_equation_0, values = (var_5578_cast_fp16_14, var_5556_cast_fp16_14))[name = tensor<string, []>("aw_829_cast_fp16")];
+            tensor<string, []> aw_831_equation_0 = const()[name = tensor<string, []>("aw_831_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_831_cast_fp16 = einsum(equation = aw_831_equation_0, values = (var_5578_cast_fp16_15, var_5556_cast_fp16_15))[name = tensor<string, []>("aw_831_cast_fp16")];
+            tensor<string, []> aw_833_equation_0 = const()[name = tensor<string, []>("aw_833_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_833_cast_fp16 = einsum(equation = aw_833_equation_0, values = (var_5578_cast_fp16_16, var_5556_cast_fp16_16))[name = tensor<string, []>("aw_833_cast_fp16")];
+            tensor<string, []> aw_835_equation_0 = const()[name = tensor<string, []>("aw_835_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_835_cast_fp16 = einsum(equation = aw_835_equation_0, values = (var_5578_cast_fp16_17, var_5556_cast_fp16_17))[name = tensor<string, []>("aw_835_cast_fp16")];
+            tensor<string, []> aw_837_equation_0 = const()[name = tensor<string, []>("aw_837_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_837_cast_fp16 = einsum(equation = aw_837_equation_0, values = (var_5578_cast_fp16_18, var_5556_cast_fp16_18))[name = tensor<string, []>("aw_837_cast_fp16")];
+            tensor<string, []> aw_839_equation_0 = const()[name = tensor<string, []>("aw_839_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_839_cast_fp16 = einsum(equation = aw_839_equation_0, values = (var_5578_cast_fp16_19, var_5556_cast_fp16_19))[name = tensor<string, []>("aw_839_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5660_cast_fp16 = softmax(axis = var_5504, x = aw_801_cast_fp16)[name = tensor<string, []>("op_5660_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5661_cast_fp16 = softmax(axis = var_5504, x = aw_803_cast_fp16)[name = tensor<string, []>("op_5661_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5662_cast_fp16 = softmax(axis = var_5504, x = aw_805_cast_fp16)[name = tensor<string, []>("op_5662_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5663_cast_fp16 = softmax(axis = var_5504, x = aw_807_cast_fp16)[name = tensor<string, []>("op_5663_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5664_cast_fp16 = softmax(axis = var_5504, x = aw_809_cast_fp16)[name = tensor<string, []>("op_5664_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5665_cast_fp16 = softmax(axis = var_5504, x = aw_811_cast_fp16)[name = tensor<string, []>("op_5665_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5666_cast_fp16 = softmax(axis = var_5504, x = aw_813_cast_fp16)[name = tensor<string, []>("op_5666_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5667_cast_fp16 = softmax(axis = var_5504, x = aw_815_cast_fp16)[name = tensor<string, []>("op_5667_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5668_cast_fp16 = softmax(axis = var_5504, x = aw_817_cast_fp16)[name = tensor<string, []>("op_5668_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5669_cast_fp16 = softmax(axis = var_5504, x = aw_819_cast_fp16)[name = tensor<string, []>("op_5669_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5670_cast_fp16 = softmax(axis = var_5504, x = aw_821_cast_fp16)[name = tensor<string, []>("op_5670_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5671_cast_fp16 = softmax(axis = var_5504, x = aw_823_cast_fp16)[name = tensor<string, []>("op_5671_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5672_cast_fp16 = softmax(axis = var_5504, x = aw_825_cast_fp16)[name = tensor<string, []>("op_5672_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5673_cast_fp16 = softmax(axis = var_5504, x = aw_827_cast_fp16)[name = tensor<string, []>("op_5673_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5674_cast_fp16 = softmax(axis = var_5504, x = aw_829_cast_fp16)[name = tensor<string, []>("op_5674_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5675_cast_fp16 = softmax(axis = var_5504, x = aw_831_cast_fp16)[name = tensor<string, []>("op_5675_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5676_cast_fp16 = softmax(axis = var_5504, x = aw_833_cast_fp16)[name = tensor<string, []>("op_5676_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5677_cast_fp16 = softmax(axis = var_5504, x = aw_835_cast_fp16)[name = tensor<string, []>("op_5677_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5678_cast_fp16 = softmax(axis = var_5504, x = aw_837_cast_fp16)[name = tensor<string, []>("op_5678_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5679_cast_fp16 = softmax(axis = var_5504, x = aw_839_cast_fp16)[name = tensor<string, []>("op_5679_cast_fp16")];
+            tensor<string, []> var_5681_equation_0 = const()[name = tensor<string, []>("op_5681_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5681_cast_fp16 = einsum(equation = var_5681_equation_0, values = (var_5599_cast_fp16_0, var_5660_cast_fp16))[name = tensor<string, []>("op_5681_cast_fp16")];
+            tensor<string, []> var_5683_equation_0 = const()[name = tensor<string, []>("op_5683_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5683_cast_fp16 = einsum(equation = var_5683_equation_0, values = (var_5599_cast_fp16_1, var_5661_cast_fp16))[name = tensor<string, []>("op_5683_cast_fp16")];
+            tensor<string, []> var_5685_equation_0 = const()[name = tensor<string, []>("op_5685_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5685_cast_fp16 = einsum(equation = var_5685_equation_0, values = (var_5599_cast_fp16_2, var_5662_cast_fp16))[name = tensor<string, []>("op_5685_cast_fp16")];
+            tensor<string, []> var_5687_equation_0 = const()[name = tensor<string, []>("op_5687_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5687_cast_fp16 = einsum(equation = var_5687_equation_0, values = (var_5599_cast_fp16_3, var_5663_cast_fp16))[name = tensor<string, []>("op_5687_cast_fp16")];
+            tensor<string, []> var_5689_equation_0 = const()[name = tensor<string, []>("op_5689_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5689_cast_fp16 = einsum(equation = var_5689_equation_0, values = (var_5599_cast_fp16_4, var_5664_cast_fp16))[name = tensor<string, []>("op_5689_cast_fp16")];
+            tensor<string, []> var_5691_equation_0 = const()[name = tensor<string, []>("op_5691_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5691_cast_fp16 = einsum(equation = var_5691_equation_0, values = (var_5599_cast_fp16_5, var_5665_cast_fp16))[name = tensor<string, []>("op_5691_cast_fp16")];
+            tensor<string, []> var_5693_equation_0 = const()[name = tensor<string, []>("op_5693_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5693_cast_fp16 = einsum(equation = var_5693_equation_0, values = (var_5599_cast_fp16_6, var_5666_cast_fp16))[name = tensor<string, []>("op_5693_cast_fp16")];
+            tensor<string, []> var_5695_equation_0 = const()[name = tensor<string, []>("op_5695_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5695_cast_fp16 = einsum(equation = var_5695_equation_0, values = (var_5599_cast_fp16_7, var_5667_cast_fp16))[name = tensor<string, []>("op_5695_cast_fp16")];
+            tensor<string, []> var_5697_equation_0 = const()[name = tensor<string, []>("op_5697_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5697_cast_fp16 = einsum(equation = var_5697_equation_0, values = (var_5599_cast_fp16_8, var_5668_cast_fp16))[name = tensor<string, []>("op_5697_cast_fp16")];
+            tensor<string, []> var_5699_equation_0 = const()[name = tensor<string, []>("op_5699_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5699_cast_fp16 = einsum(equation = var_5699_equation_0, values = (var_5599_cast_fp16_9, var_5669_cast_fp16))[name = tensor<string, []>("op_5699_cast_fp16")];
+            tensor<string, []> var_5701_equation_0 = const()[name = tensor<string, []>("op_5701_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5701_cast_fp16 = einsum(equation = var_5701_equation_0, values = (var_5599_cast_fp16_10, var_5670_cast_fp16))[name = tensor<string, []>("op_5701_cast_fp16")];
+            tensor<string, []> var_5703_equation_0 = const()[name = tensor<string, []>("op_5703_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5703_cast_fp16 = einsum(equation = var_5703_equation_0, values = (var_5599_cast_fp16_11, var_5671_cast_fp16))[name = tensor<string, []>("op_5703_cast_fp16")];
+            tensor<string, []> var_5705_equation_0 = const()[name = tensor<string, []>("op_5705_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5705_cast_fp16 = einsum(equation = var_5705_equation_0, values = (var_5599_cast_fp16_12, var_5672_cast_fp16))[name = tensor<string, []>("op_5705_cast_fp16")];
+            tensor<string, []> var_5707_equation_0 = const()[name = tensor<string, []>("op_5707_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5707_cast_fp16 = einsum(equation = var_5707_equation_0, values = (var_5599_cast_fp16_13, var_5673_cast_fp16))[name = tensor<string, []>("op_5707_cast_fp16")];
+            tensor<string, []> var_5709_equation_0 = const()[name = tensor<string, []>("op_5709_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5709_cast_fp16 = einsum(equation = var_5709_equation_0, values = (var_5599_cast_fp16_14, var_5674_cast_fp16))[name = tensor<string, []>("op_5709_cast_fp16")];
+            tensor<string, []> var_5711_equation_0 = const()[name = tensor<string, []>("op_5711_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5711_cast_fp16 = einsum(equation = var_5711_equation_0, values = (var_5599_cast_fp16_15, var_5675_cast_fp16))[name = tensor<string, []>("op_5711_cast_fp16")];
+            tensor<string, []> var_5713_equation_0 = const()[name = tensor<string, []>("op_5713_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5713_cast_fp16 = einsum(equation = var_5713_equation_0, values = (var_5599_cast_fp16_16, var_5676_cast_fp16))[name = tensor<string, []>("op_5713_cast_fp16")];
+            tensor<string, []> var_5715_equation_0 = const()[name = tensor<string, []>("op_5715_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5715_cast_fp16 = einsum(equation = var_5715_equation_0, values = (var_5599_cast_fp16_17, var_5677_cast_fp16))[name = tensor<string, []>("op_5715_cast_fp16")];
+            tensor<string, []> var_5717_equation_0 = const()[name = tensor<string, []>("op_5717_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5717_cast_fp16 = einsum(equation = var_5717_equation_0, values = (var_5599_cast_fp16_18, var_5678_cast_fp16))[name = tensor<string, []>("op_5717_cast_fp16")];
+            tensor<string, []> var_5719_equation_0 = const()[name = tensor<string, []>("op_5719_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5719_cast_fp16 = einsum(equation = var_5719_equation_0, values = (var_5599_cast_fp16_19, var_5679_cast_fp16))[name = tensor<string, []>("op_5719_cast_fp16")];
+            tensor<bool, []> input_205_interleave_0 = const()[name = tensor<string, []>("input_205_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_205_cast_fp16 = concat(axis = var_5504, interleave = input_205_interleave_0, values = (var_5681_cast_fp16, var_5683_cast_fp16, var_5685_cast_fp16, var_5687_cast_fp16, var_5689_cast_fp16, var_5691_cast_fp16, var_5693_cast_fp16, var_5695_cast_fp16, var_5697_cast_fp16, var_5699_cast_fp16, var_5701_cast_fp16, var_5703_cast_fp16, var_5705_cast_fp16, var_5707_cast_fp16, var_5709_cast_fp16, var_5711_cast_fp16, var_5713_cast_fp16, var_5715_cast_fp16, var_5717_cast_fp16, var_5719_cast_fp16))[name = tensor<string, []>("input_205_cast_fp16")];
+            tensor<string, []> var_5728_pad_type_0 = const()[name = tensor<string, []>("op_5728_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5728_strides_0 = const()[name = tensor<string, []>("op_5728_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5728_pad_0 = const()[name = tensor<string, []>("op_5728_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5728_dilations_0 = const()[name = tensor<string, []>("op_5728_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5728_groups_0 = const()[name = tensor<string, []>("op_5728_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_20_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(811196992)))];
+            tensor<fp16, [1280]> blocks_20_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(814473856)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5728_cast_fp16 = conv(bias = blocks_20_attn_out_bias_to_fp16, dilations = var_5728_dilations_0, groups = var_5728_groups_0, pad = var_5728_pad_0, pad_type = var_5728_pad_type_0, strides = var_5728_strides_0, weight = blocks_20_attn_out_weight_to_fp16, x = input_205_cast_fp16)[name = tensor<string, []>("op_5728_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_83_cast_fp16 = add(x = inputs_81_cast_fp16, y = var_5728_cast_fp16)[name = tensor<string, []>("inputs_83_cast_fp16")];
+            tensor<int32, [1]> input_207_axes_0 = const()[name = tensor<string, []>("input_207_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_207_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_207_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(814476480)))];
+            tensor<fp16, [1280]> input_207_beta_0_to_fp16 = const()[name = tensor<string, []>("input_207_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(814479104)))];
+            tensor<fp16, []> var_5738_to_fp16 = const()[name = tensor<string, []>("op_5738_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_207_cast_fp16 = layer_norm(axes = input_207_axes_0, beta = input_207_beta_0_to_fp16, epsilon = var_5738_to_fp16, gamma = input_207_gamma_0_to_fp16, x = inputs_83_cast_fp16)[name = tensor<string, []>("input_207_cast_fp16")];
+            tensor<string, []> input_209_pad_type_0 = const()[name = tensor<string, []>("input_209_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_209_strides_0 = const()[name = tensor<string, []>("input_209_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_209_pad_0 = const()[name = tensor<string, []>("input_209_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_209_dilations_0 = const()[name = tensor<string, []>("input_209_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_209_groups_0 = const()[name = tensor<string, []>("input_209_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_20_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(814481728)))];
+            tensor<fp16, [5120]> blocks_20_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(827588992)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_209_cast_fp16 = conv(bias = blocks_20_mlp_0_bias_to_fp16, dilations = input_209_dilations_0, groups = input_209_groups_0, pad = input_209_pad_0, pad_type = input_209_pad_type_0, strides = input_209_strides_0, weight = blocks_20_mlp_0_weight_to_fp16, x = input_207_cast_fp16)[name = tensor<string, []>("input_209_cast_fp16")];
+            tensor<string, []> input_211_mode_0 = const()[name = tensor<string, []>("input_211_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_211_cast_fp16 = gelu(mode = input_211_mode_0, x = input_209_cast_fp16)[name = tensor<string, []>("input_211_cast_fp16")];
+            tensor<string, []> var_5764_pad_type_0 = const()[name = tensor<string, []>("op_5764_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5764_strides_0 = const()[name = tensor<string, []>("op_5764_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5764_pad_0 = const()[name = tensor<string, []>("op_5764_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5764_dilations_0 = const()[name = tensor<string, []>("op_5764_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5764_groups_0 = const()[name = tensor<string, []>("op_5764_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_20_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(827599296)))];
+            tensor<fp16, [1280]> blocks_20_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(840706560)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5764_cast_fp16 = conv(bias = blocks_20_mlp_2_bias_to_fp16, dilations = var_5764_dilations_0, groups = var_5764_groups_0, pad = var_5764_pad_0, pad_type = var_5764_pad_type_0, strides = var_5764_strides_0, weight = blocks_20_mlp_2_weight_to_fp16, x = input_211_cast_fp16)[name = tensor<string, []>("op_5764_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_85_cast_fp16 = add(x = inputs_83_cast_fp16, y = var_5764_cast_fp16)[name = tensor<string, []>("inputs_85_cast_fp16")];
+            tensor<int32, []> var_5773 = const()[name = tensor<string, []>("op_5773"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_213_axes_0 = const()[name = tensor<string, []>("input_213_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_213_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_213_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(840709184)))];
+            tensor<fp16, [1280]> input_213_beta_0_to_fp16 = const()[name = tensor<string, []>("input_213_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(840711808)))];
+            tensor<fp16, []> var_5789_to_fp16 = const()[name = tensor<string, []>("op_5789_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_213_cast_fp16 = layer_norm(axes = input_213_axes_0, beta = input_213_beta_0_to_fp16, epsilon = var_5789_to_fp16, gamma = input_213_gamma_0_to_fp16, x = inputs_85_cast_fp16)[name = tensor<string, []>("input_213_cast_fp16")];
+            tensor<string, []> q_43_pad_type_0 = const()[name = tensor<string, []>("q_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_43_strides_0 = const()[name = tensor<string, []>("q_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_43_pad_0 = const()[name = tensor<string, []>("q_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_43_dilations_0 = const()[name = tensor<string, []>("q_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_43_groups_0 = const()[name = tensor<string, []>("q_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_5824_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5824_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(840714432)))];
+            tensor<fp16, [1280]> var_5824_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5824_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(843991296)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5824_cast_fp16 = conv(bias = var_5824_bias_0_to_fp16, dilations = q_43_dilations_0, groups = q_43_groups_0, pad = q_43_pad_0, pad_type = q_43_pad_type_0, strides = q_43_strides_0, weight = var_5824_weight_0_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("op_5824_cast_fp16")];
+            tensor<string, []> k_43_pad_type_0 = const()[name = tensor<string, []>("k_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_43_strides_0 = const()[name = tensor<string, []>("k_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_43_pad_0 = const()[name = tensor<string, []>("k_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_43_dilations_0 = const()[name = tensor<string, []>("k_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_43_groups_0 = const()[name = tensor<string, []>("k_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_21_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(843993920)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_43_cast_fp16 = conv(dilations = k_43_dilations_0, groups = k_43_groups_0, pad = k_43_pad_0, pad_type = k_43_pad_type_0, strides = k_43_strides_0, weight = blocks_21_attn_key_weight_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("k_43_cast_fp16")];
+            tensor<string, []> var_5822_pad_type_0 = const()[name = tensor<string, []>("op_5822_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5822_strides_0 = const()[name = tensor<string, []>("op_5822_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5822_pad_0 = const()[name = tensor<string, []>("op_5822_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5822_dilations_0 = const()[name = tensor<string, []>("op_5822_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5822_groups_0 = const()[name = tensor<string, []>("op_5822_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_21_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(847270784)))];
+            tensor<fp16, [1280]> blocks_21_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(850547648)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5822_cast_fp16 = conv(bias = blocks_21_attn_value_bias_to_fp16, dilations = var_5822_dilations_0, groups = var_5822_groups_0, pad = var_5822_pad_0, pad_type = var_5822_pad_type_0, strides = var_5822_strides_0, weight = blocks_21_attn_value_weight_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("op_5822_cast_fp16")];
+            tensor<int32, [20]> tile_63 = const()[name = tensor<string, []>("tile_63"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5825_axis_0 = const()[name = tensor<string, []>("op_5825_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_19 = split(axis = var_5825_axis_0, split_sizes = tile_63, x = var_5824_cast_fp16)[name = tensor<string, []>("op_5825_cast_fp16")];
+            tensor<int32, [4]> var_5846_perm_0 = const()[name = tensor<string, []>("op_5846_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_64 = const()[name = tensor<string, []>("tile_64"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5847_axis_0 = const()[name = tensor<string, []>("op_5847_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_5846_cast_fp16 = transpose(perm = var_5846_perm_0, x = k_43_cast_fp16)[name = tensor<string, []>("transpose_11")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_19 = split(axis = var_5847_axis_0, split_sizes = tile_64, x = var_5846_cast_fp16)[name = tensor<string, []>("op_5847_cast_fp16")];
+            tensor<int32, [20]> tile_65 = const()[name = tensor<string, []>("tile_65"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5868_axis_0 = const()[name = tensor<string, []>("op_5868_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_19 = split(axis = var_5868_axis_0, split_sizes = tile_65, x = var_5822_cast_fp16)[name = tensor<string, []>("op_5868_cast_fp16")];
+            tensor<string, []> aw_841_equation_0 = const()[name = tensor<string, []>("aw_841_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_841_cast_fp16 = einsum(equation = aw_841_equation_0, values = (var_5847_cast_fp16_0, var_5825_cast_fp16_0))[name = tensor<string, []>("aw_841_cast_fp16")];
+            tensor<string, []> aw_843_equation_0 = const()[name = tensor<string, []>("aw_843_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_843_cast_fp16 = einsum(equation = aw_843_equation_0, values = (var_5847_cast_fp16_1, var_5825_cast_fp16_1))[name = tensor<string, []>("aw_843_cast_fp16")];
+            tensor<string, []> aw_845_equation_0 = const()[name = tensor<string, []>("aw_845_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_845_cast_fp16 = einsum(equation = aw_845_equation_0, values = (var_5847_cast_fp16_2, var_5825_cast_fp16_2))[name = tensor<string, []>("aw_845_cast_fp16")];
+            tensor<string, []> aw_847_equation_0 = const()[name = tensor<string, []>("aw_847_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_847_cast_fp16 = einsum(equation = aw_847_equation_0, values = (var_5847_cast_fp16_3, var_5825_cast_fp16_3))[name = tensor<string, []>("aw_847_cast_fp16")];
+            tensor<string, []> aw_849_equation_0 = const()[name = tensor<string, []>("aw_849_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_849_cast_fp16 = einsum(equation = aw_849_equation_0, values = (var_5847_cast_fp16_4, var_5825_cast_fp16_4))[name = tensor<string, []>("aw_849_cast_fp16")];
+            tensor<string, []> aw_851_equation_0 = const()[name = tensor<string, []>("aw_851_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_851_cast_fp16 = einsum(equation = aw_851_equation_0, values = (var_5847_cast_fp16_5, var_5825_cast_fp16_5))[name = tensor<string, []>("aw_851_cast_fp16")];
+            tensor<string, []> aw_853_equation_0 = const()[name = tensor<string, []>("aw_853_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_853_cast_fp16 = einsum(equation = aw_853_equation_0, values = (var_5847_cast_fp16_6, var_5825_cast_fp16_6))[name = tensor<string, []>("aw_853_cast_fp16")];
+            tensor<string, []> aw_855_equation_0 = const()[name = tensor<string, []>("aw_855_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_855_cast_fp16 = einsum(equation = aw_855_equation_0, values = (var_5847_cast_fp16_7, var_5825_cast_fp16_7))[name = tensor<string, []>("aw_855_cast_fp16")];
+            tensor<string, []> aw_857_equation_0 = const()[name = tensor<string, []>("aw_857_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_857_cast_fp16 = einsum(equation = aw_857_equation_0, values = (var_5847_cast_fp16_8, var_5825_cast_fp16_8))[name = tensor<string, []>("aw_857_cast_fp16")];
+            tensor<string, []> aw_859_equation_0 = const()[name = tensor<string, []>("aw_859_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_859_cast_fp16 = einsum(equation = aw_859_equation_0, values = (var_5847_cast_fp16_9, var_5825_cast_fp16_9))[name = tensor<string, []>("aw_859_cast_fp16")];
+            tensor<string, []> aw_861_equation_0 = const()[name = tensor<string, []>("aw_861_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_861_cast_fp16 = einsum(equation = aw_861_equation_0, values = (var_5847_cast_fp16_10, var_5825_cast_fp16_10))[name = tensor<string, []>("aw_861_cast_fp16")];
+            tensor<string, []> aw_863_equation_0 = const()[name = tensor<string, []>("aw_863_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_863_cast_fp16 = einsum(equation = aw_863_equation_0, values = (var_5847_cast_fp16_11, var_5825_cast_fp16_11))[name = tensor<string, []>("aw_863_cast_fp16")];
+            tensor<string, []> aw_865_equation_0 = const()[name = tensor<string, []>("aw_865_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_865_cast_fp16 = einsum(equation = aw_865_equation_0, values = (var_5847_cast_fp16_12, var_5825_cast_fp16_12))[name = tensor<string, []>("aw_865_cast_fp16")];
+            tensor<string, []> aw_867_equation_0 = const()[name = tensor<string, []>("aw_867_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_867_cast_fp16 = einsum(equation = aw_867_equation_0, values = (var_5847_cast_fp16_13, var_5825_cast_fp16_13))[name = tensor<string, []>("aw_867_cast_fp16")];
+            tensor<string, []> aw_869_equation_0 = const()[name = tensor<string, []>("aw_869_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_869_cast_fp16 = einsum(equation = aw_869_equation_0, values = (var_5847_cast_fp16_14, var_5825_cast_fp16_14))[name = tensor<string, []>("aw_869_cast_fp16")];
+            tensor<string, []> aw_871_equation_0 = const()[name = tensor<string, []>("aw_871_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_871_cast_fp16 = einsum(equation = aw_871_equation_0, values = (var_5847_cast_fp16_15, var_5825_cast_fp16_15))[name = tensor<string, []>("aw_871_cast_fp16")];
+            tensor<string, []> aw_873_equation_0 = const()[name = tensor<string, []>("aw_873_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_873_cast_fp16 = einsum(equation = aw_873_equation_0, values = (var_5847_cast_fp16_16, var_5825_cast_fp16_16))[name = tensor<string, []>("aw_873_cast_fp16")];
+            tensor<string, []> aw_875_equation_0 = const()[name = tensor<string, []>("aw_875_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_875_cast_fp16 = einsum(equation = aw_875_equation_0, values = (var_5847_cast_fp16_17, var_5825_cast_fp16_17))[name = tensor<string, []>("aw_875_cast_fp16")];
+            tensor<string, []> aw_877_equation_0 = const()[name = tensor<string, []>("aw_877_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_877_cast_fp16 = einsum(equation = aw_877_equation_0, values = (var_5847_cast_fp16_18, var_5825_cast_fp16_18))[name = tensor<string, []>("aw_877_cast_fp16")];
+            tensor<string, []> aw_879_equation_0 = const()[name = tensor<string, []>("aw_879_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_879_cast_fp16 = einsum(equation = aw_879_equation_0, values = (var_5847_cast_fp16_19, var_5825_cast_fp16_19))[name = tensor<string, []>("aw_879_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5929_cast_fp16 = softmax(axis = var_5773, x = aw_841_cast_fp16)[name = tensor<string, []>("op_5929_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5930_cast_fp16 = softmax(axis = var_5773, x = aw_843_cast_fp16)[name = tensor<string, []>("op_5930_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5931_cast_fp16 = softmax(axis = var_5773, x = aw_845_cast_fp16)[name = tensor<string, []>("op_5931_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5932_cast_fp16 = softmax(axis = var_5773, x = aw_847_cast_fp16)[name = tensor<string, []>("op_5932_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5933_cast_fp16 = softmax(axis = var_5773, x = aw_849_cast_fp16)[name = tensor<string, []>("op_5933_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5934_cast_fp16 = softmax(axis = var_5773, x = aw_851_cast_fp16)[name = tensor<string, []>("op_5934_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5935_cast_fp16 = softmax(axis = var_5773, x = aw_853_cast_fp16)[name = tensor<string, []>("op_5935_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5936_cast_fp16 = softmax(axis = var_5773, x = aw_855_cast_fp16)[name = tensor<string, []>("op_5936_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5937_cast_fp16 = softmax(axis = var_5773, x = aw_857_cast_fp16)[name = tensor<string, []>("op_5937_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5938_cast_fp16 = softmax(axis = var_5773, x = aw_859_cast_fp16)[name = tensor<string, []>("op_5938_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5939_cast_fp16 = softmax(axis = var_5773, x = aw_861_cast_fp16)[name = tensor<string, []>("op_5939_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5940_cast_fp16 = softmax(axis = var_5773, x = aw_863_cast_fp16)[name = tensor<string, []>("op_5940_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5941_cast_fp16 = softmax(axis = var_5773, x = aw_865_cast_fp16)[name = tensor<string, []>("op_5941_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5942_cast_fp16 = softmax(axis = var_5773, x = aw_867_cast_fp16)[name = tensor<string, []>("op_5942_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5943_cast_fp16 = softmax(axis = var_5773, x = aw_869_cast_fp16)[name = tensor<string, []>("op_5943_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5944_cast_fp16 = softmax(axis = var_5773, x = aw_871_cast_fp16)[name = tensor<string, []>("op_5944_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5945_cast_fp16 = softmax(axis = var_5773, x = aw_873_cast_fp16)[name = tensor<string, []>("op_5945_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5946_cast_fp16 = softmax(axis = var_5773, x = aw_875_cast_fp16)[name = tensor<string, []>("op_5946_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5947_cast_fp16 = softmax(axis = var_5773, x = aw_877_cast_fp16)[name = tensor<string, []>("op_5947_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5948_cast_fp16 = softmax(axis = var_5773, x = aw_879_cast_fp16)[name = tensor<string, []>("op_5948_cast_fp16")];
+            tensor<string, []> var_5950_equation_0 = const()[name = tensor<string, []>("op_5950_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5950_cast_fp16 = einsum(equation = var_5950_equation_0, values = (var_5868_cast_fp16_0, var_5929_cast_fp16))[name = tensor<string, []>("op_5950_cast_fp16")];
+            tensor<string, []> var_5952_equation_0 = const()[name = tensor<string, []>("op_5952_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5952_cast_fp16 = einsum(equation = var_5952_equation_0, values = (var_5868_cast_fp16_1, var_5930_cast_fp16))[name = tensor<string, []>("op_5952_cast_fp16")];
+            tensor<string, []> var_5954_equation_0 = const()[name = tensor<string, []>("op_5954_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5954_cast_fp16 = einsum(equation = var_5954_equation_0, values = (var_5868_cast_fp16_2, var_5931_cast_fp16))[name = tensor<string, []>("op_5954_cast_fp16")];
+            tensor<string, []> var_5956_equation_0 = const()[name = tensor<string, []>("op_5956_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5956_cast_fp16 = einsum(equation = var_5956_equation_0, values = (var_5868_cast_fp16_3, var_5932_cast_fp16))[name = tensor<string, []>("op_5956_cast_fp16")];
+            tensor<string, []> var_5958_equation_0 = const()[name = tensor<string, []>("op_5958_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5958_cast_fp16 = einsum(equation = var_5958_equation_0, values = (var_5868_cast_fp16_4, var_5933_cast_fp16))[name = tensor<string, []>("op_5958_cast_fp16")];
+            tensor<string, []> var_5960_equation_0 = const()[name = tensor<string, []>("op_5960_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5960_cast_fp16 = einsum(equation = var_5960_equation_0, values = (var_5868_cast_fp16_5, var_5934_cast_fp16))[name = tensor<string, []>("op_5960_cast_fp16")];
+            tensor<string, []> var_5962_equation_0 = const()[name = tensor<string, []>("op_5962_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5962_cast_fp16 = einsum(equation = var_5962_equation_0, values = (var_5868_cast_fp16_6, var_5935_cast_fp16))[name = tensor<string, []>("op_5962_cast_fp16")];
+            tensor<string, []> var_5964_equation_0 = const()[name = tensor<string, []>("op_5964_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5964_cast_fp16 = einsum(equation = var_5964_equation_0, values = (var_5868_cast_fp16_7, var_5936_cast_fp16))[name = tensor<string, []>("op_5964_cast_fp16")];
+            tensor<string, []> var_5966_equation_0 = const()[name = tensor<string, []>("op_5966_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5966_cast_fp16 = einsum(equation = var_5966_equation_0, values = (var_5868_cast_fp16_8, var_5937_cast_fp16))[name = tensor<string, []>("op_5966_cast_fp16")];
+            tensor<string, []> var_5968_equation_0 = const()[name = tensor<string, []>("op_5968_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5968_cast_fp16 = einsum(equation = var_5968_equation_0, values = (var_5868_cast_fp16_9, var_5938_cast_fp16))[name = tensor<string, []>("op_5968_cast_fp16")];
+            tensor<string, []> var_5970_equation_0 = const()[name = tensor<string, []>("op_5970_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5970_cast_fp16 = einsum(equation = var_5970_equation_0, values = (var_5868_cast_fp16_10, var_5939_cast_fp16))[name = tensor<string, []>("op_5970_cast_fp16")];
+            tensor<string, []> var_5972_equation_0 = const()[name = tensor<string, []>("op_5972_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5972_cast_fp16 = einsum(equation = var_5972_equation_0, values = (var_5868_cast_fp16_11, var_5940_cast_fp16))[name = tensor<string, []>("op_5972_cast_fp16")];
+            tensor<string, []> var_5974_equation_0 = const()[name = tensor<string, []>("op_5974_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5974_cast_fp16 = einsum(equation = var_5974_equation_0, values = (var_5868_cast_fp16_12, var_5941_cast_fp16))[name = tensor<string, []>("op_5974_cast_fp16")];
+            tensor<string, []> var_5976_equation_0 = const()[name = tensor<string, []>("op_5976_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5976_cast_fp16 = einsum(equation = var_5976_equation_0, values = (var_5868_cast_fp16_13, var_5942_cast_fp16))[name = tensor<string, []>("op_5976_cast_fp16")];
+            tensor<string, []> var_5978_equation_0 = const()[name = tensor<string, []>("op_5978_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5978_cast_fp16 = einsum(equation = var_5978_equation_0, values = (var_5868_cast_fp16_14, var_5943_cast_fp16))[name = tensor<string, []>("op_5978_cast_fp16")];
+            tensor<string, []> var_5980_equation_0 = const()[name = tensor<string, []>("op_5980_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5980_cast_fp16 = einsum(equation = var_5980_equation_0, values = (var_5868_cast_fp16_15, var_5944_cast_fp16))[name = tensor<string, []>("op_5980_cast_fp16")];
+            tensor<string, []> var_5982_equation_0 = const()[name = tensor<string, []>("op_5982_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5982_cast_fp16 = einsum(equation = var_5982_equation_0, values = (var_5868_cast_fp16_16, var_5945_cast_fp16))[name = tensor<string, []>("op_5982_cast_fp16")];
+            tensor<string, []> var_5984_equation_0 = const()[name = tensor<string, []>("op_5984_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5984_cast_fp16 = einsum(equation = var_5984_equation_0, values = (var_5868_cast_fp16_17, var_5946_cast_fp16))[name = tensor<string, []>("op_5984_cast_fp16")];
+            tensor<string, []> var_5986_equation_0 = const()[name = tensor<string, []>("op_5986_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5986_cast_fp16 = einsum(equation = var_5986_equation_0, values = (var_5868_cast_fp16_18, var_5947_cast_fp16))[name = tensor<string, []>("op_5986_cast_fp16")];
+            tensor<string, []> var_5988_equation_0 = const()[name = tensor<string, []>("op_5988_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5988_cast_fp16 = einsum(equation = var_5988_equation_0, values = (var_5868_cast_fp16_19, var_5948_cast_fp16))[name = tensor<string, []>("op_5988_cast_fp16")];
+            tensor<bool, []> input_215_interleave_0 = const()[name = tensor<string, []>("input_215_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_215_cast_fp16 = concat(axis = var_5773, interleave = input_215_interleave_0, values = (var_5950_cast_fp16, var_5952_cast_fp16, var_5954_cast_fp16, var_5956_cast_fp16, var_5958_cast_fp16, var_5960_cast_fp16, var_5962_cast_fp16, var_5964_cast_fp16, var_5966_cast_fp16, var_5968_cast_fp16, var_5970_cast_fp16, var_5972_cast_fp16, var_5974_cast_fp16, var_5976_cast_fp16, var_5978_cast_fp16, var_5980_cast_fp16, var_5982_cast_fp16, var_5984_cast_fp16, var_5986_cast_fp16, var_5988_cast_fp16))[name = tensor<string, []>("input_215_cast_fp16")];
+            tensor<string, []> var_5997_pad_type_0 = const()[name = tensor<string, []>("op_5997_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5997_strides_0 = const()[name = tensor<string, []>("op_5997_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5997_pad_0 = const()[name = tensor<string, []>("op_5997_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5997_dilations_0 = const()[name = tensor<string, []>("op_5997_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5997_groups_0 = const()[name = tensor<string, []>("op_5997_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_21_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(850550272)))];
+            tensor<fp16, [1280]> blocks_21_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(853827136)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5997_cast_fp16 = conv(bias = blocks_21_attn_out_bias_to_fp16, dilations = var_5997_dilations_0, groups = var_5997_groups_0, pad = var_5997_pad_0, pad_type = var_5997_pad_type_0, strides = var_5997_strides_0, weight = blocks_21_attn_out_weight_to_fp16, x = input_215_cast_fp16)[name = tensor<string, []>("op_5997_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_87_cast_fp16 = add(x = inputs_85_cast_fp16, y = var_5997_cast_fp16)[name = tensor<string, []>("inputs_87_cast_fp16")];
+            tensor<int32, [1]> input_217_axes_0 = const()[name = tensor<string, []>("input_217_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_217_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_217_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(853829760)))];
+            tensor<fp16, [1280]> input_217_beta_0_to_fp16 = const()[name = tensor<string, []>("input_217_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(853832384)))];
+            tensor<fp16, []> var_6007_to_fp16 = const()[name = tensor<string, []>("op_6007_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_217_cast_fp16 = layer_norm(axes = input_217_axes_0, beta = input_217_beta_0_to_fp16, epsilon = var_6007_to_fp16, gamma = input_217_gamma_0_to_fp16, x = inputs_87_cast_fp16)[name = tensor<string, []>("input_217_cast_fp16")];
+            tensor<string, []> input_219_pad_type_0 = const()[name = tensor<string, []>("input_219_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_219_strides_0 = const()[name = tensor<string, []>("input_219_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_219_pad_0 = const()[name = tensor<string, []>("input_219_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_219_dilations_0 = const()[name = tensor<string, []>("input_219_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_219_groups_0 = const()[name = tensor<string, []>("input_219_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_21_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(853835008)))];
+            tensor<fp16, [5120]> blocks_21_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(866942272)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_219_cast_fp16 = conv(bias = blocks_21_mlp_0_bias_to_fp16, dilations = input_219_dilations_0, groups = input_219_groups_0, pad = input_219_pad_0, pad_type = input_219_pad_type_0, strides = input_219_strides_0, weight = blocks_21_mlp_0_weight_to_fp16, x = input_217_cast_fp16)[name = tensor<string, []>("input_219_cast_fp16")];
+            tensor<string, []> input_221_mode_0 = const()[name = tensor<string, []>("input_221_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_221_cast_fp16 = gelu(mode = input_221_mode_0, x = input_219_cast_fp16)[name = tensor<string, []>("input_221_cast_fp16")];
+            tensor<string, []> var_6033_pad_type_0 = const()[name = tensor<string, []>("op_6033_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6033_strides_0 = const()[name = tensor<string, []>("op_6033_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6033_pad_0 = const()[name = tensor<string, []>("op_6033_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6033_dilations_0 = const()[name = tensor<string, []>("op_6033_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6033_groups_0 = const()[name = tensor<string, []>("op_6033_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_21_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(866952576)))];
+            tensor<fp16, [1280]> blocks_21_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(880059840)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6033_cast_fp16 = conv(bias = blocks_21_mlp_2_bias_to_fp16, dilations = var_6033_dilations_0, groups = var_6033_groups_0, pad = var_6033_pad_0, pad_type = var_6033_pad_type_0, strides = var_6033_strides_0, weight = blocks_21_mlp_2_weight_to_fp16, x = input_221_cast_fp16)[name = tensor<string, []>("op_6033_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_89_cast_fp16 = add(x = inputs_87_cast_fp16, y = var_6033_cast_fp16)[name = tensor<string, []>("inputs_89_cast_fp16")];
+            tensor<int32, []> var_6042 = const()[name = tensor<string, []>("op_6042"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_223_axes_0 = const()[name = tensor<string, []>("input_223_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_223_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_223_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(880062464)))];
+            tensor<fp16, [1280]> input_223_beta_0_to_fp16 = const()[name = tensor<string, []>("input_223_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(880065088)))];
+            tensor<fp16, []> var_6058_to_fp16 = const()[name = tensor<string, []>("op_6058_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_223_cast_fp16 = layer_norm(axes = input_223_axes_0, beta = input_223_beta_0_to_fp16, epsilon = var_6058_to_fp16, gamma = input_223_gamma_0_to_fp16, x = inputs_89_cast_fp16)[name = tensor<string, []>("input_223_cast_fp16")];
+            tensor<string, []> q_45_pad_type_0 = const()[name = tensor<string, []>("q_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_45_strides_0 = const()[name = tensor<string, []>("q_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_45_pad_0 = const()[name = tensor<string, []>("q_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_45_dilations_0 = const()[name = tensor<string, []>("q_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_45_groups_0 = const()[name = tensor<string, []>("q_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_6093_weight_0_to_fp16 = const()[name = tensor<string, []>("op_6093_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(880067712)))];
+            tensor<fp16, [1280]> var_6093_bias_0_to_fp16 = const()[name = tensor<string, []>("op_6093_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(883344576)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6093_cast_fp16 = conv(bias = var_6093_bias_0_to_fp16, dilations = q_45_dilations_0, groups = q_45_groups_0, pad = q_45_pad_0, pad_type = q_45_pad_type_0, strides = q_45_strides_0, weight = var_6093_weight_0_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("op_6093_cast_fp16")];
+            tensor<string, []> k_45_pad_type_0 = const()[name = tensor<string, []>("k_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_45_strides_0 = const()[name = tensor<string, []>("k_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_45_pad_0 = const()[name = tensor<string, []>("k_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_45_dilations_0 = const()[name = tensor<string, []>("k_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_45_groups_0 = const()[name = tensor<string, []>("k_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_22_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(883347200)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_45_cast_fp16 = conv(dilations = k_45_dilations_0, groups = k_45_groups_0, pad = k_45_pad_0, pad_type = k_45_pad_type_0, strides = k_45_strides_0, weight = blocks_22_attn_key_weight_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("k_45_cast_fp16")];
+            tensor<string, []> var_6091_pad_type_0 = const()[name = tensor<string, []>("op_6091_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6091_strides_0 = const()[name = tensor<string, []>("op_6091_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6091_pad_0 = const()[name = tensor<string, []>("op_6091_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6091_dilations_0 = const()[name = tensor<string, []>("op_6091_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6091_groups_0 = const()[name = tensor<string, []>("op_6091_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_22_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(886624064)))];
+            tensor<fp16, [1280]> blocks_22_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(889900928)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6091_cast_fp16 = conv(bias = blocks_22_attn_value_bias_to_fp16, dilations = var_6091_dilations_0, groups = var_6091_groups_0, pad = var_6091_pad_0, pad_type = var_6091_pad_type_0, strides = var_6091_strides_0, weight = blocks_22_attn_value_weight_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("op_6091_cast_fp16")];
+            tensor<int32, [20]> tile_66 = const()[name = tensor<string, []>("tile_66"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6094_axis_0 = const()[name = tensor<string, []>("op_6094_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_19 = split(axis = var_6094_axis_0, split_sizes = tile_66, x = var_6093_cast_fp16)[name = tensor<string, []>("op_6094_cast_fp16")];
+            tensor<int32, [4]> var_6115_perm_0 = const()[name = tensor<string, []>("op_6115_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_67 = const()[name = tensor<string, []>("tile_67"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6116_axis_0 = const()[name = tensor<string, []>("op_6116_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_6115_cast_fp16 = transpose(perm = var_6115_perm_0, x = k_45_cast_fp16)[name = tensor<string, []>("transpose_10")];
+            tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_19 = split(axis = var_6116_axis_0, split_sizes = tile_67, x = var_6115_cast_fp16)[name = tensor<string, []>("op_6116_cast_fp16")];
+            tensor<int32, [20]> tile_68 = const()[name = tensor<string, []>("tile_68"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6137_axis_0 = const()[name = tensor<string, []>("op_6137_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_19 = split(axis = var_6137_axis_0, split_sizes = tile_68, x = var_6091_cast_fp16)[name = tensor<string, []>("op_6137_cast_fp16")];
+            tensor<string, []> aw_881_equation_0 = const()[name = tensor<string, []>("aw_881_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_881_cast_fp16 = einsum(equation = aw_881_equation_0, values = (var_6116_cast_fp16_0, var_6094_cast_fp16_0))[name = tensor<string, []>("aw_881_cast_fp16")];
+            tensor<string, []> aw_883_equation_0 = const()[name = tensor<string, []>("aw_883_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_883_cast_fp16 = einsum(equation = aw_883_equation_0, values = (var_6116_cast_fp16_1, var_6094_cast_fp16_1))[name = tensor<string, []>("aw_883_cast_fp16")];
+            tensor<string, []> aw_885_equation_0 = const()[name = tensor<string, []>("aw_885_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_885_cast_fp16 = einsum(equation = aw_885_equation_0, values = (var_6116_cast_fp16_2, var_6094_cast_fp16_2))[name = tensor<string, []>("aw_885_cast_fp16")];
+            tensor<string, []> aw_887_equation_0 = const()[name = tensor<string, []>("aw_887_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_887_cast_fp16 = einsum(equation = aw_887_equation_0, values = (var_6116_cast_fp16_3, var_6094_cast_fp16_3))[name = tensor<string, []>("aw_887_cast_fp16")];
+            tensor<string, []> aw_889_equation_0 = const()[name = tensor<string, []>("aw_889_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_889_cast_fp16 = einsum(equation = aw_889_equation_0, values = (var_6116_cast_fp16_4, var_6094_cast_fp16_4))[name = tensor<string, []>("aw_889_cast_fp16")];
+            tensor<string, []> aw_891_equation_0 = const()[name = tensor<string, []>("aw_891_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_891_cast_fp16 = einsum(equation = aw_891_equation_0, values = (var_6116_cast_fp16_5, var_6094_cast_fp16_5))[name = tensor<string, []>("aw_891_cast_fp16")];
+            tensor<string, []> aw_893_equation_0 = const()[name = tensor<string, []>("aw_893_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_893_cast_fp16 = einsum(equation = aw_893_equation_0, values = (var_6116_cast_fp16_6, var_6094_cast_fp16_6))[name = tensor<string, []>("aw_893_cast_fp16")];
+            tensor<string, []> aw_895_equation_0 = const()[name = tensor<string, []>("aw_895_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_895_cast_fp16 = einsum(equation = aw_895_equation_0, values = (var_6116_cast_fp16_7, var_6094_cast_fp16_7))[name = tensor<string, []>("aw_895_cast_fp16")];
+            tensor<string, []> aw_897_equation_0 = const()[name = tensor<string, []>("aw_897_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_897_cast_fp16 = einsum(equation = aw_897_equation_0, values = (var_6116_cast_fp16_8, var_6094_cast_fp16_8))[name = tensor<string, []>("aw_897_cast_fp16")];
+            tensor<string, []> aw_899_equation_0 = const()[name = tensor<string, []>("aw_899_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_899_cast_fp16 = einsum(equation = aw_899_equation_0, values = (var_6116_cast_fp16_9, var_6094_cast_fp16_9))[name = tensor<string, []>("aw_899_cast_fp16")];
+            tensor<string, []> aw_901_equation_0 = const()[name = tensor<string, []>("aw_901_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_901_cast_fp16 = einsum(equation = aw_901_equation_0, values = (var_6116_cast_fp16_10, var_6094_cast_fp16_10))[name = tensor<string, []>("aw_901_cast_fp16")];
+            tensor<string, []> aw_903_equation_0 = const()[name = tensor<string, []>("aw_903_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_903_cast_fp16 = einsum(equation = aw_903_equation_0, values = (var_6116_cast_fp16_11, var_6094_cast_fp16_11))[name = tensor<string, []>("aw_903_cast_fp16")];
+            tensor<string, []> aw_905_equation_0 = const()[name = tensor<string, []>("aw_905_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_905_cast_fp16 = einsum(equation = aw_905_equation_0, values = (var_6116_cast_fp16_12, var_6094_cast_fp16_12))[name = tensor<string, []>("aw_905_cast_fp16")];
+            tensor<string, []> aw_907_equation_0 = const()[name = tensor<string, []>("aw_907_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_907_cast_fp16 = einsum(equation = aw_907_equation_0, values = (var_6116_cast_fp16_13, var_6094_cast_fp16_13))[name = tensor<string, []>("aw_907_cast_fp16")];
+            tensor<string, []> aw_909_equation_0 = const()[name = tensor<string, []>("aw_909_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_909_cast_fp16 = einsum(equation = aw_909_equation_0, values = (var_6116_cast_fp16_14, var_6094_cast_fp16_14))[name = tensor<string, []>("aw_909_cast_fp16")];
+            tensor<string, []> aw_911_equation_0 = const()[name = tensor<string, []>("aw_911_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_911_cast_fp16 = einsum(equation = aw_911_equation_0, values = (var_6116_cast_fp16_15, var_6094_cast_fp16_15))[name = tensor<string, []>("aw_911_cast_fp16")];
+            tensor<string, []> aw_913_equation_0 = const()[name = tensor<string, []>("aw_913_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_913_cast_fp16 = einsum(equation = aw_913_equation_0, values = (var_6116_cast_fp16_16, var_6094_cast_fp16_16))[name = tensor<string, []>("aw_913_cast_fp16")];
+            tensor<string, []> aw_915_equation_0 = const()[name = tensor<string, []>("aw_915_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_915_cast_fp16 = einsum(equation = aw_915_equation_0, values = (var_6116_cast_fp16_17, var_6094_cast_fp16_17))[name = tensor<string, []>("aw_915_cast_fp16")];
+            tensor<string, []> aw_917_equation_0 = const()[name = tensor<string, []>("aw_917_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_917_cast_fp16 = einsum(equation = aw_917_equation_0, values = (var_6116_cast_fp16_18, var_6094_cast_fp16_18))[name = tensor<string, []>("aw_917_cast_fp16")];
+            tensor<string, []> aw_919_equation_0 = const()[name = tensor<string, []>("aw_919_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_919_cast_fp16 = einsum(equation = aw_919_equation_0, values = (var_6116_cast_fp16_19, var_6094_cast_fp16_19))[name = tensor<string, []>("aw_919_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6198_cast_fp16 = softmax(axis = var_6042, x = aw_881_cast_fp16)[name = tensor<string, []>("op_6198_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6199_cast_fp16 = softmax(axis = var_6042, x = aw_883_cast_fp16)[name = tensor<string, []>("op_6199_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6200_cast_fp16 = softmax(axis = var_6042, x = aw_885_cast_fp16)[name = tensor<string, []>("op_6200_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6201_cast_fp16 = softmax(axis = var_6042, x = aw_887_cast_fp16)[name = tensor<string, []>("op_6201_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6202_cast_fp16 = softmax(axis = var_6042, x = aw_889_cast_fp16)[name = tensor<string, []>("op_6202_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6203_cast_fp16 = softmax(axis = var_6042, x = aw_891_cast_fp16)[name = tensor<string, []>("op_6203_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6204_cast_fp16 = softmax(axis = var_6042, x = aw_893_cast_fp16)[name = tensor<string, []>("op_6204_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6205_cast_fp16 = softmax(axis = var_6042, x = aw_895_cast_fp16)[name = tensor<string, []>("op_6205_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6206_cast_fp16 = softmax(axis = var_6042, x = aw_897_cast_fp16)[name = tensor<string, []>("op_6206_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6207_cast_fp16 = softmax(axis = var_6042, x = aw_899_cast_fp16)[name = tensor<string, []>("op_6207_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6208_cast_fp16 = softmax(axis = var_6042, x = aw_901_cast_fp16)[name = tensor<string, []>("op_6208_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6209_cast_fp16 = softmax(axis = var_6042, x = aw_903_cast_fp16)[name = tensor<string, []>("op_6209_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6210_cast_fp16 = softmax(axis = var_6042, x = aw_905_cast_fp16)[name = tensor<string, []>("op_6210_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6211_cast_fp16 = softmax(axis = var_6042, x = aw_907_cast_fp16)[name = tensor<string, []>("op_6211_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6212_cast_fp16 = softmax(axis = var_6042, x = aw_909_cast_fp16)[name = tensor<string, []>("op_6212_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6213_cast_fp16 = softmax(axis = var_6042, x = aw_911_cast_fp16)[name = tensor<string, []>("op_6213_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6214_cast_fp16 = softmax(axis = var_6042, x = aw_913_cast_fp16)[name = tensor<string, []>("op_6214_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6215_cast_fp16 = softmax(axis = var_6042, x = aw_915_cast_fp16)[name = tensor<string, []>("op_6215_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6216_cast_fp16 = softmax(axis = var_6042, x = aw_917_cast_fp16)[name = tensor<string, []>("op_6216_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6217_cast_fp16 = softmax(axis = var_6042, x = aw_919_cast_fp16)[name = tensor<string, []>("op_6217_cast_fp16")];
+            tensor<string, []> var_6219_equation_0 = const()[name = tensor<string, []>("op_6219_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6219_cast_fp16 = einsum(equation = var_6219_equation_0, values = (var_6137_cast_fp16_0, var_6198_cast_fp16))[name = tensor<string, []>("op_6219_cast_fp16")];
+            tensor<string, []> var_6221_equation_0 = const()[name = tensor<string, []>("op_6221_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6221_cast_fp16 = einsum(equation = var_6221_equation_0, values = (var_6137_cast_fp16_1, var_6199_cast_fp16))[name = tensor<string, []>("op_6221_cast_fp16")];
+            tensor<string, []> var_6223_equation_0 = const()[name = tensor<string, []>("op_6223_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6223_cast_fp16 = einsum(equation = var_6223_equation_0, values = (var_6137_cast_fp16_2, var_6200_cast_fp16))[name = tensor<string, []>("op_6223_cast_fp16")];
+            tensor<string, []> var_6225_equation_0 = const()[name = tensor<string, []>("op_6225_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6225_cast_fp16 = einsum(equation = var_6225_equation_0, values = (var_6137_cast_fp16_3, var_6201_cast_fp16))[name = tensor<string, []>("op_6225_cast_fp16")];
+            tensor<string, []> var_6227_equation_0 = const()[name = tensor<string, []>("op_6227_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6227_cast_fp16 = einsum(equation = var_6227_equation_0, values = (var_6137_cast_fp16_4, var_6202_cast_fp16))[name = tensor<string, []>("op_6227_cast_fp16")];
+            tensor<string, []> var_6229_equation_0 = const()[name = tensor<string, []>("op_6229_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6229_cast_fp16 = einsum(equation = var_6229_equation_0, values = (var_6137_cast_fp16_5, var_6203_cast_fp16))[name = tensor<string, []>("op_6229_cast_fp16")];
+            tensor<string, []> var_6231_equation_0 = const()[name = tensor<string, []>("op_6231_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6231_cast_fp16 = einsum(equation = var_6231_equation_0, values = (var_6137_cast_fp16_6, var_6204_cast_fp16))[name = tensor<string, []>("op_6231_cast_fp16")];
+            tensor<string, []> var_6233_equation_0 = const()[name = tensor<string, []>("op_6233_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6233_cast_fp16 = einsum(equation = var_6233_equation_0, values = (var_6137_cast_fp16_7, var_6205_cast_fp16))[name = tensor<string, []>("op_6233_cast_fp16")];
+            tensor<string, []> var_6235_equation_0 = const()[name = tensor<string, []>("op_6235_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6235_cast_fp16 = einsum(equation = var_6235_equation_0, values = (var_6137_cast_fp16_8, var_6206_cast_fp16))[name = tensor<string, []>("op_6235_cast_fp16")];
+            tensor<string, []> var_6237_equation_0 = const()[name = tensor<string, []>("op_6237_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6237_cast_fp16 = einsum(equation = var_6237_equation_0, values = (var_6137_cast_fp16_9, var_6207_cast_fp16))[name = tensor<string, []>("op_6237_cast_fp16")];
+            tensor<string, []> var_6239_equation_0 = const()[name = tensor<string, []>("op_6239_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6239_cast_fp16 = einsum(equation = var_6239_equation_0, values = (var_6137_cast_fp16_10, var_6208_cast_fp16))[name = tensor<string, []>("op_6239_cast_fp16")];
+            tensor<string, []> var_6241_equation_0 = const()[name = tensor<string, []>("op_6241_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6241_cast_fp16 = einsum(equation = var_6241_equation_0, values = (var_6137_cast_fp16_11, var_6209_cast_fp16))[name = tensor<string, []>("op_6241_cast_fp16")];
+            tensor<string, []> var_6243_equation_0 = const()[name = tensor<string, []>("op_6243_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6243_cast_fp16 = einsum(equation = var_6243_equation_0, values = (var_6137_cast_fp16_12, var_6210_cast_fp16))[name = tensor<string, []>("op_6243_cast_fp16")];
+            tensor<string, []> var_6245_equation_0 = const()[name = tensor<string, []>("op_6245_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6245_cast_fp16 = einsum(equation = var_6245_equation_0, values = (var_6137_cast_fp16_13, var_6211_cast_fp16))[name = tensor<string, []>("op_6245_cast_fp16")];
+            tensor<string, []> var_6247_equation_0 = const()[name = tensor<string, []>("op_6247_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6247_cast_fp16 = einsum(equation = var_6247_equation_0, values = (var_6137_cast_fp16_14, var_6212_cast_fp16))[name = tensor<string, []>("op_6247_cast_fp16")];
+            tensor<string, []> var_6249_equation_0 = const()[name = tensor<string, []>("op_6249_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6249_cast_fp16 = einsum(equation = var_6249_equation_0, values = (var_6137_cast_fp16_15, var_6213_cast_fp16))[name = tensor<string, []>("op_6249_cast_fp16")];
+            tensor<string, []> var_6251_equation_0 = const()[name = tensor<string, []>("op_6251_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6251_cast_fp16 = einsum(equation = var_6251_equation_0, values = (var_6137_cast_fp16_16, var_6214_cast_fp16))[name = tensor<string, []>("op_6251_cast_fp16")];
+            tensor<string, []> var_6253_equation_0 = const()[name = tensor<string, []>("op_6253_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6253_cast_fp16 = einsum(equation = var_6253_equation_0, values = (var_6137_cast_fp16_17, var_6215_cast_fp16))[name = tensor<string, []>("op_6253_cast_fp16")];
+            tensor<string, []> var_6255_equation_0 = const()[name = tensor<string, []>("op_6255_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6255_cast_fp16 = einsum(equation = var_6255_equation_0, values = (var_6137_cast_fp16_18, var_6216_cast_fp16))[name = tensor<string, []>("op_6255_cast_fp16")];
+            tensor<string, []> var_6257_equation_0 = const()[name = tensor<string, []>("op_6257_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6257_cast_fp16 = einsum(equation = var_6257_equation_0, values = (var_6137_cast_fp16_19, var_6217_cast_fp16))[name = tensor<string, []>("op_6257_cast_fp16")];
+            tensor<bool, []> input_225_interleave_0 = const()[name = tensor<string, []>("input_225_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_225_cast_fp16 = concat(axis = var_6042, interleave = input_225_interleave_0, values = (var_6219_cast_fp16, var_6221_cast_fp16, var_6223_cast_fp16, var_6225_cast_fp16, var_6227_cast_fp16, var_6229_cast_fp16, var_6231_cast_fp16, var_6233_cast_fp16, var_6235_cast_fp16, var_6237_cast_fp16, var_6239_cast_fp16, var_6241_cast_fp16, var_6243_cast_fp16, var_6245_cast_fp16, var_6247_cast_fp16, var_6249_cast_fp16, var_6251_cast_fp16, var_6253_cast_fp16, var_6255_cast_fp16, var_6257_cast_fp16))[name = tensor<string, []>("input_225_cast_fp16")];
+            tensor<string, []> var_6266_pad_type_0 = const()[name = tensor<string, []>("op_6266_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6266_strides_0 = const()[name = tensor<string, []>("op_6266_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6266_pad_0 = const()[name = tensor<string, []>("op_6266_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6266_dilations_0 = const()[name = tensor<string, []>("op_6266_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6266_groups_0 = const()[name = tensor<string, []>("op_6266_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_22_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(889903552)))];
+            tensor<fp16, [1280]> blocks_22_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(893180416)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6266_cast_fp16 = conv(bias = blocks_22_attn_out_bias_to_fp16, dilations = var_6266_dilations_0, groups = var_6266_groups_0, pad = var_6266_pad_0, pad_type = var_6266_pad_type_0, strides = var_6266_strides_0, weight = blocks_22_attn_out_weight_to_fp16, x = input_225_cast_fp16)[name = tensor<string, []>("op_6266_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_91_cast_fp16 = add(x = inputs_89_cast_fp16, y = var_6266_cast_fp16)[name = tensor<string, []>("inputs_91_cast_fp16")];
+            tensor<int32, [1]> input_227_axes_0 = const()[name = tensor<string, []>("input_227_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_227_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_227_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(893183040)))];
+            tensor<fp16, [1280]> input_227_beta_0_to_fp16 = const()[name = tensor<string, []>("input_227_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(893185664)))];
+            tensor<fp16, []> var_6276_to_fp16 = const()[name = tensor<string, []>("op_6276_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_227_cast_fp16 = layer_norm(axes = input_227_axes_0, beta = input_227_beta_0_to_fp16, epsilon = var_6276_to_fp16, gamma = input_227_gamma_0_to_fp16, x = inputs_91_cast_fp16)[name = tensor<string, []>("input_227_cast_fp16")];
+            tensor<string, []> input_229_pad_type_0 = const()[name = tensor<string, []>("input_229_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_229_strides_0 = const()[name = tensor<string, []>("input_229_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_229_pad_0 = const()[name = tensor<string, []>("input_229_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_229_dilations_0 = const()[name = tensor<string, []>("input_229_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_229_groups_0 = const()[name = tensor<string, []>("input_229_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_22_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(893188288)))];
+            tensor<fp16, [5120]> blocks_22_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(906295552)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_229_cast_fp16 = conv(bias = blocks_22_mlp_0_bias_to_fp16, dilations = input_229_dilations_0, groups = input_229_groups_0, pad = input_229_pad_0, pad_type = input_229_pad_type_0, strides = input_229_strides_0, weight = blocks_22_mlp_0_weight_to_fp16, x = input_227_cast_fp16)[name = tensor<string, []>("input_229_cast_fp16")];
+            tensor<string, []> input_231_mode_0 = const()[name = tensor<string, []>("input_231_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_231_cast_fp16 = gelu(mode = input_231_mode_0, x = input_229_cast_fp16)[name = tensor<string, []>("input_231_cast_fp16")];
+            tensor<string, []> var_6302_pad_type_0 = const()[name = tensor<string, []>("op_6302_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6302_strides_0 = const()[name = tensor<string, []>("op_6302_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6302_pad_0 = const()[name = tensor<string, []>("op_6302_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6302_dilations_0 = const()[name = tensor<string, []>("op_6302_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6302_groups_0 = const()[name = tensor<string, []>("op_6302_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_22_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(906305856)))];
+            tensor<fp16, [1280]> blocks_22_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(919413120)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6302_cast_fp16 = conv(bias = blocks_22_mlp_2_bias_to_fp16, dilations = var_6302_dilations_0, groups = var_6302_groups_0, pad = var_6302_pad_0, pad_type = var_6302_pad_type_0, strides = var_6302_strides_0, weight = blocks_22_mlp_2_weight_to_fp16, x = input_231_cast_fp16)[name = tensor<string, []>("op_6302_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_93_cast_fp16 = add(x = inputs_91_cast_fp16, y = var_6302_cast_fp16)[name = tensor<string, []>("inputs_93_cast_fp16")];
+            tensor<int32, []> var_6311 = const()[name = tensor<string, []>("op_6311"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_233_axes_0 = const()[name = tensor<string, []>("input_233_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_233_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_233_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(919415744)))];
+            tensor<fp16, [1280]> input_233_beta_0_to_fp16 = const()[name = tensor<string, []>("input_233_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(919418368)))];
+            tensor<fp16, []> var_6327_to_fp16 = const()[name = tensor<string, []>("op_6327_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_233_cast_fp16 = layer_norm(axes = input_233_axes_0, beta = input_233_beta_0_to_fp16, epsilon = var_6327_to_fp16, gamma = input_233_gamma_0_to_fp16, x = inputs_93_cast_fp16)[name = tensor<string, []>("input_233_cast_fp16")];
+            tensor<string, []> q_47_pad_type_0 = const()[name = tensor<string, []>("q_47_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_47_strides_0 = const()[name = tensor<string, []>("q_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_47_pad_0 = const()[name = tensor<string, []>("q_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_47_dilations_0 = const()[name = tensor<string, []>("q_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_47_groups_0 = const()[name = tensor<string, []>("q_47_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_6362_weight_0_to_fp16 = const()[name = tensor<string, []>("op_6362_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(919420992)))];
+            tensor<fp16, [1280]> var_6362_bias_0_to_fp16 = const()[name = tensor<string, []>("op_6362_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(922697856)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6362_cast_fp16 = conv(bias = var_6362_bias_0_to_fp16, dilations = q_47_dilations_0, groups = q_47_groups_0, pad = q_47_pad_0, pad_type = q_47_pad_type_0, strides = q_47_strides_0, weight = var_6362_weight_0_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("op_6362_cast_fp16")];
+            tensor<string, []> k_47_pad_type_0 = const()[name = tensor<string, []>("k_47_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_47_strides_0 = const()[name = tensor<string, []>("k_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_47_pad_0 = const()[name = tensor<string, []>("k_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_47_dilations_0 = const()[name = tensor<string, []>("k_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_47_groups_0 = const()[name = tensor<string, []>("k_47_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_23_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(922700480)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_47_cast_fp16 = conv(dilations = k_47_dilations_0, groups = k_47_groups_0, pad = k_47_pad_0, pad_type = k_47_pad_type_0, strides = k_47_strides_0, weight = blocks_23_attn_key_weight_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("k_47_cast_fp16")];
+            tensor<string, []> var_6360_pad_type_0 = const()[name = tensor<string, []>("op_6360_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6360_strides_0 = const()[name = tensor<string, []>("op_6360_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6360_pad_0 = const()[name = tensor<string, []>("op_6360_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6360_dilations_0 = const()[name = tensor<string, []>("op_6360_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6360_groups_0 = const()[name = tensor<string, []>("op_6360_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_23_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(925977344)))];
+            tensor<fp16, [1280]> blocks_23_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(929254208)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6360_cast_fp16 = conv(bias = blocks_23_attn_value_bias_to_fp16, dilations = var_6360_dilations_0, groups = var_6360_groups_0, pad = var_6360_pad_0, pad_type = var_6360_pad_type_0, strides = var_6360_strides_0, weight = blocks_23_attn_value_weight_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("op_6360_cast_fp16")];
+            tensor<int32, [20]> tile_69 = const()[name = tensor<string, []>("tile_69"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6363_axis_0 = const()[name = tensor<string, []>("op_6363_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_19 = split(axis = var_6363_axis_0, split_sizes = tile_69, x = var_6362_cast_fp16)[name = tensor<string, []>("op_6363_cast_fp16")];
+            tensor<int32, [4]> var_6384_perm_0 = const()[name = tensor<string, []>("op_6384_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_70 = const()[name = tensor<string, []>("tile_70"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6385_axis_0 = const()[name = tensor<string, []>("op_6385_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_6384_cast_fp16 = transpose(perm = var_6384_perm_0, x = k_47_cast_fp16)[name = tensor<string, []>("transpose_9")];
+            tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_19 = split(axis = var_6385_axis_0, split_sizes = tile_70, x = var_6384_cast_fp16)[name = tensor<string, []>("op_6385_cast_fp16")];
+            tensor<int32, [20]> tile_71 = const()[name = tensor<string, []>("tile_71"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6406_axis_0 = const()[name = tensor<string, []>("op_6406_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_19 = split(axis = var_6406_axis_0, split_sizes = tile_71, x = var_6360_cast_fp16)[name = tensor<string, []>("op_6406_cast_fp16")];
+            tensor<string, []> aw_921_equation_0 = const()[name = tensor<string, []>("aw_921_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_921_cast_fp16 = einsum(equation = aw_921_equation_0, values = (var_6385_cast_fp16_0, var_6363_cast_fp16_0))[name = tensor<string, []>("aw_921_cast_fp16")];
+            tensor<string, []> aw_923_equation_0 = const()[name = tensor<string, []>("aw_923_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_923_cast_fp16 = einsum(equation = aw_923_equation_0, values = (var_6385_cast_fp16_1, var_6363_cast_fp16_1))[name = tensor<string, []>("aw_923_cast_fp16")];
+            tensor<string, []> aw_925_equation_0 = const()[name = tensor<string, []>("aw_925_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_925_cast_fp16 = einsum(equation = aw_925_equation_0, values = (var_6385_cast_fp16_2, var_6363_cast_fp16_2))[name = tensor<string, []>("aw_925_cast_fp16")];
+            tensor<string, []> aw_927_equation_0 = const()[name = tensor<string, []>("aw_927_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_927_cast_fp16 = einsum(equation = aw_927_equation_0, values = (var_6385_cast_fp16_3, var_6363_cast_fp16_3))[name = tensor<string, []>("aw_927_cast_fp16")];
+            tensor<string, []> aw_929_equation_0 = const()[name = tensor<string, []>("aw_929_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_929_cast_fp16 = einsum(equation = aw_929_equation_0, values = (var_6385_cast_fp16_4, var_6363_cast_fp16_4))[name = tensor<string, []>("aw_929_cast_fp16")];
+            tensor<string, []> aw_931_equation_0 = const()[name = tensor<string, []>("aw_931_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_931_cast_fp16 = einsum(equation = aw_931_equation_0, values = (var_6385_cast_fp16_5, var_6363_cast_fp16_5))[name = tensor<string, []>("aw_931_cast_fp16")];
+            tensor<string, []> aw_933_equation_0 = const()[name = tensor<string, []>("aw_933_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_933_cast_fp16 = einsum(equation = aw_933_equation_0, values = (var_6385_cast_fp16_6, var_6363_cast_fp16_6))[name = tensor<string, []>("aw_933_cast_fp16")];
+            tensor<string, []> aw_935_equation_0 = const()[name = tensor<string, []>("aw_935_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_935_cast_fp16 = einsum(equation = aw_935_equation_0, values = (var_6385_cast_fp16_7, var_6363_cast_fp16_7))[name = tensor<string, []>("aw_935_cast_fp16")];
+            tensor<string, []> aw_937_equation_0 = const()[name = tensor<string, []>("aw_937_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_937_cast_fp16 = einsum(equation = aw_937_equation_0, values = (var_6385_cast_fp16_8, var_6363_cast_fp16_8))[name = tensor<string, []>("aw_937_cast_fp16")];
+            tensor<string, []> aw_939_equation_0 = const()[name = tensor<string, []>("aw_939_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_939_cast_fp16 = einsum(equation = aw_939_equation_0, values = (var_6385_cast_fp16_9, var_6363_cast_fp16_9))[name = tensor<string, []>("aw_939_cast_fp16")];
+            tensor<string, []> aw_941_equation_0 = const()[name = tensor<string, []>("aw_941_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_941_cast_fp16 = einsum(equation = aw_941_equation_0, values = (var_6385_cast_fp16_10, var_6363_cast_fp16_10))[name = tensor<string, []>("aw_941_cast_fp16")];
+            tensor<string, []> aw_943_equation_0 = const()[name = tensor<string, []>("aw_943_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_943_cast_fp16 = einsum(equation = aw_943_equation_0, values = (var_6385_cast_fp16_11, var_6363_cast_fp16_11))[name = tensor<string, []>("aw_943_cast_fp16")];
+            tensor<string, []> aw_945_equation_0 = const()[name = tensor<string, []>("aw_945_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_945_cast_fp16 = einsum(equation = aw_945_equation_0, values = (var_6385_cast_fp16_12, var_6363_cast_fp16_12))[name = tensor<string, []>("aw_945_cast_fp16")];
+            tensor<string, []> aw_947_equation_0 = const()[name = tensor<string, []>("aw_947_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_947_cast_fp16 = einsum(equation = aw_947_equation_0, values = (var_6385_cast_fp16_13, var_6363_cast_fp16_13))[name = tensor<string, []>("aw_947_cast_fp16")];
+            tensor<string, []> aw_949_equation_0 = const()[name = tensor<string, []>("aw_949_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_949_cast_fp16 = einsum(equation = aw_949_equation_0, values = (var_6385_cast_fp16_14, var_6363_cast_fp16_14))[name = tensor<string, []>("aw_949_cast_fp16")];
+            tensor<string, []> aw_951_equation_0 = const()[name = tensor<string, []>("aw_951_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_951_cast_fp16 = einsum(equation = aw_951_equation_0, values = (var_6385_cast_fp16_15, var_6363_cast_fp16_15))[name = tensor<string, []>("aw_951_cast_fp16")];
+            tensor<string, []> aw_953_equation_0 = const()[name = tensor<string, []>("aw_953_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_953_cast_fp16 = einsum(equation = aw_953_equation_0, values = (var_6385_cast_fp16_16, var_6363_cast_fp16_16))[name = tensor<string, []>("aw_953_cast_fp16")];
+            tensor<string, []> aw_955_equation_0 = const()[name = tensor<string, []>("aw_955_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_955_cast_fp16 = einsum(equation = aw_955_equation_0, values = (var_6385_cast_fp16_17, var_6363_cast_fp16_17))[name = tensor<string, []>("aw_955_cast_fp16")];
+            tensor<string, []> aw_957_equation_0 = const()[name = tensor<string, []>("aw_957_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_957_cast_fp16 = einsum(equation = aw_957_equation_0, values = (var_6385_cast_fp16_18, var_6363_cast_fp16_18))[name = tensor<string, []>("aw_957_cast_fp16")];
+            tensor<string, []> aw_959_equation_0 = const()[name = tensor<string, []>("aw_959_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_959_cast_fp16 = einsum(equation = aw_959_equation_0, values = (var_6385_cast_fp16_19, var_6363_cast_fp16_19))[name = tensor<string, []>("aw_959_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6467_cast_fp16 = softmax(axis = var_6311, x = aw_921_cast_fp16)[name = tensor<string, []>("op_6467_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6468_cast_fp16 = softmax(axis = var_6311, x = aw_923_cast_fp16)[name = tensor<string, []>("op_6468_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6469_cast_fp16 = softmax(axis = var_6311, x = aw_925_cast_fp16)[name = tensor<string, []>("op_6469_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6470_cast_fp16 = softmax(axis = var_6311, x = aw_927_cast_fp16)[name = tensor<string, []>("op_6470_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6471_cast_fp16 = softmax(axis = var_6311, x = aw_929_cast_fp16)[name = tensor<string, []>("op_6471_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6472_cast_fp16 = softmax(axis = var_6311, x = aw_931_cast_fp16)[name = tensor<string, []>("op_6472_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6473_cast_fp16 = softmax(axis = var_6311, x = aw_933_cast_fp16)[name = tensor<string, []>("op_6473_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6474_cast_fp16 = softmax(axis = var_6311, x = aw_935_cast_fp16)[name = tensor<string, []>("op_6474_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6475_cast_fp16 = softmax(axis = var_6311, x = aw_937_cast_fp16)[name = tensor<string, []>("op_6475_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6476_cast_fp16 = softmax(axis = var_6311, x = aw_939_cast_fp16)[name = tensor<string, []>("op_6476_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6477_cast_fp16 = softmax(axis = var_6311, x = aw_941_cast_fp16)[name = tensor<string, []>("op_6477_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6478_cast_fp16 = softmax(axis = var_6311, x = aw_943_cast_fp16)[name = tensor<string, []>("op_6478_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6479_cast_fp16 = softmax(axis = var_6311, x = aw_945_cast_fp16)[name = tensor<string, []>("op_6479_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6480_cast_fp16 = softmax(axis = var_6311, x = aw_947_cast_fp16)[name = tensor<string, []>("op_6480_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6481_cast_fp16 = softmax(axis = var_6311, x = aw_949_cast_fp16)[name = tensor<string, []>("op_6481_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6482_cast_fp16 = softmax(axis = var_6311, x = aw_951_cast_fp16)[name = tensor<string, []>("op_6482_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6483_cast_fp16 = softmax(axis = var_6311, x = aw_953_cast_fp16)[name = tensor<string, []>("op_6483_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6484_cast_fp16 = softmax(axis = var_6311, x = aw_955_cast_fp16)[name = tensor<string, []>("op_6484_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6485_cast_fp16 = softmax(axis = var_6311, x = aw_957_cast_fp16)[name = tensor<string, []>("op_6485_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6486_cast_fp16 = softmax(axis = var_6311, x = aw_959_cast_fp16)[name = tensor<string, []>("op_6486_cast_fp16")];
+            tensor<string, []> var_6488_equation_0 = const()[name = tensor<string, []>("op_6488_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6488_cast_fp16 = einsum(equation = var_6488_equation_0, values = (var_6406_cast_fp16_0, var_6467_cast_fp16))[name = tensor<string, []>("op_6488_cast_fp16")];
+            tensor<string, []> var_6490_equation_0 = const()[name = tensor<string, []>("op_6490_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6490_cast_fp16 = einsum(equation = var_6490_equation_0, values = (var_6406_cast_fp16_1, var_6468_cast_fp16))[name = tensor<string, []>("op_6490_cast_fp16")];
+            tensor<string, []> var_6492_equation_0 = const()[name = tensor<string, []>("op_6492_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6492_cast_fp16 = einsum(equation = var_6492_equation_0, values = (var_6406_cast_fp16_2, var_6469_cast_fp16))[name = tensor<string, []>("op_6492_cast_fp16")];
+            tensor<string, []> var_6494_equation_0 = const()[name = tensor<string, []>("op_6494_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6494_cast_fp16 = einsum(equation = var_6494_equation_0, values = (var_6406_cast_fp16_3, var_6470_cast_fp16))[name = tensor<string, []>("op_6494_cast_fp16")];
+            tensor<string, []> var_6496_equation_0 = const()[name = tensor<string, []>("op_6496_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6496_cast_fp16 = einsum(equation = var_6496_equation_0, values = (var_6406_cast_fp16_4, var_6471_cast_fp16))[name = tensor<string, []>("op_6496_cast_fp16")];
+            tensor<string, []> var_6498_equation_0 = const()[name = tensor<string, []>("op_6498_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6498_cast_fp16 = einsum(equation = var_6498_equation_0, values = (var_6406_cast_fp16_5, var_6472_cast_fp16))[name = tensor<string, []>("op_6498_cast_fp16")];
+            tensor<string, []> var_6500_equation_0 = const()[name = tensor<string, []>("op_6500_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6500_cast_fp16 = einsum(equation = var_6500_equation_0, values = (var_6406_cast_fp16_6, var_6473_cast_fp16))[name = tensor<string, []>("op_6500_cast_fp16")];
+            tensor<string, []> var_6502_equation_0 = const()[name = tensor<string, []>("op_6502_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6502_cast_fp16 = einsum(equation = var_6502_equation_0, values = (var_6406_cast_fp16_7, var_6474_cast_fp16))[name = tensor<string, []>("op_6502_cast_fp16")];
+            tensor<string, []> var_6504_equation_0 = const()[name = tensor<string, []>("op_6504_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6504_cast_fp16 = einsum(equation = var_6504_equation_0, values = (var_6406_cast_fp16_8, var_6475_cast_fp16))[name = tensor<string, []>("op_6504_cast_fp16")];
+            tensor<string, []> var_6506_equation_0 = const()[name = tensor<string, []>("op_6506_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6506_cast_fp16 = einsum(equation = var_6506_equation_0, values = (var_6406_cast_fp16_9, var_6476_cast_fp16))[name = tensor<string, []>("op_6506_cast_fp16")];
+            tensor<string, []> var_6508_equation_0 = const()[name = tensor<string, []>("op_6508_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6508_cast_fp16 = einsum(equation = var_6508_equation_0, values = (var_6406_cast_fp16_10, var_6477_cast_fp16))[name = tensor<string, []>("op_6508_cast_fp16")];
+            tensor<string, []> var_6510_equation_0 = const()[name = tensor<string, []>("op_6510_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6510_cast_fp16 = einsum(equation = var_6510_equation_0, values = (var_6406_cast_fp16_11, var_6478_cast_fp16))[name = tensor<string, []>("op_6510_cast_fp16")];
+            tensor<string, []> var_6512_equation_0 = const()[name = tensor<string, []>("op_6512_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6512_cast_fp16 = einsum(equation = var_6512_equation_0, values = (var_6406_cast_fp16_12, var_6479_cast_fp16))[name = tensor<string, []>("op_6512_cast_fp16")];
+            tensor<string, []> var_6514_equation_0 = const()[name = tensor<string, []>("op_6514_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6514_cast_fp16 = einsum(equation = var_6514_equation_0, values = (var_6406_cast_fp16_13, var_6480_cast_fp16))[name = tensor<string, []>("op_6514_cast_fp16")];
+            tensor<string, []> var_6516_equation_0 = const()[name = tensor<string, []>("op_6516_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6516_cast_fp16 = einsum(equation = var_6516_equation_0, values = (var_6406_cast_fp16_14, var_6481_cast_fp16))[name = tensor<string, []>("op_6516_cast_fp16")];
+            tensor<string, []> var_6518_equation_0 = const()[name = tensor<string, []>("op_6518_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6518_cast_fp16 = einsum(equation = var_6518_equation_0, values = (var_6406_cast_fp16_15, var_6482_cast_fp16))[name = tensor<string, []>("op_6518_cast_fp16")];
+            tensor<string, []> var_6520_equation_0 = const()[name = tensor<string, []>("op_6520_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6520_cast_fp16 = einsum(equation = var_6520_equation_0, values = (var_6406_cast_fp16_16, var_6483_cast_fp16))[name = tensor<string, []>("op_6520_cast_fp16")];
+            tensor<string, []> var_6522_equation_0 = const()[name = tensor<string, []>("op_6522_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6522_cast_fp16 = einsum(equation = var_6522_equation_0, values = (var_6406_cast_fp16_17, var_6484_cast_fp16))[name = tensor<string, []>("op_6522_cast_fp16")];
+            tensor<string, []> var_6524_equation_0 = const()[name = tensor<string, []>("op_6524_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6524_cast_fp16 = einsum(equation = var_6524_equation_0, values = (var_6406_cast_fp16_18, var_6485_cast_fp16))[name = tensor<string, []>("op_6524_cast_fp16")];
+            tensor<string, []> var_6526_equation_0 = const()[name = tensor<string, []>("op_6526_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6526_cast_fp16 = einsum(equation = var_6526_equation_0, values = (var_6406_cast_fp16_19, var_6486_cast_fp16))[name = tensor<string, []>("op_6526_cast_fp16")];
+            tensor<bool, []> input_235_interleave_0 = const()[name = tensor<string, []>("input_235_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_235_cast_fp16 = concat(axis = var_6311, interleave = input_235_interleave_0, values = (var_6488_cast_fp16, var_6490_cast_fp16, var_6492_cast_fp16, var_6494_cast_fp16, var_6496_cast_fp16, var_6498_cast_fp16, var_6500_cast_fp16, var_6502_cast_fp16, var_6504_cast_fp16, var_6506_cast_fp16, var_6508_cast_fp16, var_6510_cast_fp16, var_6512_cast_fp16, var_6514_cast_fp16, var_6516_cast_fp16, var_6518_cast_fp16, var_6520_cast_fp16, var_6522_cast_fp16, var_6524_cast_fp16, var_6526_cast_fp16))[name = tensor<string, []>("input_235_cast_fp16")];
+            tensor<string, []> var_6535_pad_type_0 = const()[name = tensor<string, []>("op_6535_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6535_strides_0 = const()[name = tensor<string, []>("op_6535_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6535_pad_0 = const()[name = tensor<string, []>("op_6535_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6535_dilations_0 = const()[name = tensor<string, []>("op_6535_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6535_groups_0 = const()[name = tensor<string, []>("op_6535_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_23_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(929256832)))];
+            tensor<fp16, [1280]> blocks_23_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(932533696)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6535_cast_fp16 = conv(bias = blocks_23_attn_out_bias_to_fp16, dilations = var_6535_dilations_0, groups = var_6535_groups_0, pad = var_6535_pad_0, pad_type = var_6535_pad_type_0, strides = var_6535_strides_0, weight = blocks_23_attn_out_weight_to_fp16, x = input_235_cast_fp16)[name = tensor<string, []>("op_6535_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_95_cast_fp16 = add(x = inputs_93_cast_fp16, y = var_6535_cast_fp16)[name = tensor<string, []>("inputs_95_cast_fp16")];
+            tensor<int32, [1]> input_237_axes_0 = const()[name = tensor<string, []>("input_237_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_237_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_237_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(932536320)))];
+            tensor<fp16, [1280]> input_237_beta_0_to_fp16 = const()[name = tensor<string, []>("input_237_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(932538944)))];
+            tensor<fp16, []> var_6545_to_fp16 = const()[name = tensor<string, []>("op_6545_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_237_cast_fp16 = layer_norm(axes = input_237_axes_0, beta = input_237_beta_0_to_fp16, epsilon = var_6545_to_fp16, gamma = input_237_gamma_0_to_fp16, x = inputs_95_cast_fp16)[name = tensor<string, []>("input_237_cast_fp16")];
+            tensor<string, []> input_239_pad_type_0 = const()[name = tensor<string, []>("input_239_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_239_strides_0 = const()[name = tensor<string, []>("input_239_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_239_pad_0 = const()[name = tensor<string, []>("input_239_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_239_dilations_0 = const()[name = tensor<string, []>("input_239_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_239_groups_0 = const()[name = tensor<string, []>("input_239_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_23_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(932541568)))];
+            tensor<fp16, [5120]> blocks_23_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(945648832)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_239_cast_fp16 = conv(bias = blocks_23_mlp_0_bias_to_fp16, dilations = input_239_dilations_0, groups = input_239_groups_0, pad = input_239_pad_0, pad_type = input_239_pad_type_0, strides = input_239_strides_0, weight = blocks_23_mlp_0_weight_to_fp16, x = input_237_cast_fp16)[name = tensor<string, []>("input_239_cast_fp16")];
+            tensor<string, []> input_241_mode_0 = const()[name = tensor<string, []>("input_241_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_241_cast_fp16 = gelu(mode = input_241_mode_0, x = input_239_cast_fp16)[name = tensor<string, []>("input_241_cast_fp16")];
+            tensor<string, []> var_6571_pad_type_0 = const()[name = tensor<string, []>("op_6571_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6571_strides_0 = const()[name = tensor<string, []>("op_6571_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6571_pad_0 = const()[name = tensor<string, []>("op_6571_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6571_dilations_0 = const()[name = tensor<string, []>("op_6571_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6571_groups_0 = const()[name = tensor<string, []>("op_6571_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_23_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(945659136)))];
+            tensor<fp16, [1280]> blocks_23_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(958766400)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6571_cast_fp16 = conv(bias = blocks_23_mlp_2_bias_to_fp16, dilations = var_6571_dilations_0, groups = var_6571_groups_0, pad = var_6571_pad_0, pad_type = var_6571_pad_type_0, strides = var_6571_strides_0, weight = blocks_23_mlp_2_weight_to_fp16, x = input_241_cast_fp16)[name = tensor<string, []>("op_6571_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_97_cast_fp16 = add(x = inputs_95_cast_fp16, y = var_6571_cast_fp16)[name = tensor<string, []>("inputs_97_cast_fp16")];
+            tensor<int32, []> var_6580 = const()[name = tensor<string, []>("op_6580"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_243_axes_0 = const()[name = tensor<string, []>("input_243_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_243_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_243_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(958769024)))];
+            tensor<fp16, [1280]> input_243_beta_0_to_fp16 = const()[name = tensor<string, []>("input_243_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(958771648)))];
+            tensor<fp16, []> var_6596_to_fp16 = const()[name = tensor<string, []>("op_6596_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_243_cast_fp16 = layer_norm(axes = input_243_axes_0, beta = input_243_beta_0_to_fp16, epsilon = var_6596_to_fp16, gamma = input_243_gamma_0_to_fp16, x = inputs_97_cast_fp16)[name = tensor<string, []>("input_243_cast_fp16")];
+            tensor<string, []> q_49_pad_type_0 = const()[name = tensor<string, []>("q_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_49_strides_0 = const()[name = tensor<string, []>("q_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_49_pad_0 = const()[name = tensor<string, []>("q_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_49_dilations_0 = const()[name = tensor<string, []>("q_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_49_groups_0 = const()[name = tensor<string, []>("q_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_6631_weight_0_to_fp16 = const()[name = tensor<string, []>("op_6631_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(958774272)))];
+            tensor<fp16, [1280]> var_6631_bias_0_to_fp16 = const()[name = tensor<string, []>("op_6631_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(962051136)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6631_cast_fp16 = conv(bias = var_6631_bias_0_to_fp16, dilations = q_49_dilations_0, groups = q_49_groups_0, pad = q_49_pad_0, pad_type = q_49_pad_type_0, strides = q_49_strides_0, weight = var_6631_weight_0_to_fp16, x = input_243_cast_fp16)[name = tensor<string, []>("op_6631_cast_fp16")];
+            tensor<string, []> k_49_pad_type_0 = const()[name = tensor<string, []>("k_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_49_strides_0 = const()[name = tensor<string, []>("k_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_49_pad_0 = const()[name = tensor<string, []>("k_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_49_dilations_0 = const()[name = tensor<string, []>("k_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_49_groups_0 = const()[name = tensor<string, []>("k_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_24_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_24_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(962053760)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_49_cast_fp16 = conv(dilations = k_49_dilations_0, groups = k_49_groups_0, pad = k_49_pad_0, pad_type = k_49_pad_type_0, strides = k_49_strides_0, weight = blocks_24_attn_key_weight_to_fp16, x = input_243_cast_fp16)[name = tensor<string, []>("k_49_cast_fp16")];
+            tensor<string, []> var_6629_pad_type_0 = const()[name = tensor<string, []>("op_6629_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6629_strides_0 = const()[name = tensor<string, []>("op_6629_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6629_pad_0 = const()[name = tensor<string, []>("op_6629_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6629_dilations_0 = const()[name = tensor<string, []>("op_6629_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6629_groups_0 = const()[name = tensor<string, []>("op_6629_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_24_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_24_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(965330624)))];
+            tensor<fp16, [1280]> blocks_24_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_24_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(968607488)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6629_cast_fp16 = conv(bias = blocks_24_attn_value_bias_to_fp16, dilations = var_6629_dilations_0, groups = var_6629_groups_0, pad = var_6629_pad_0, pad_type = var_6629_pad_type_0, strides = var_6629_strides_0, weight = blocks_24_attn_value_weight_to_fp16, x = input_243_cast_fp16)[name = tensor<string, []>("op_6629_cast_fp16")];
+            tensor<int32, [20]> tile_72 = const()[name = tensor<string, []>("tile_72"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6632_axis_0 = const()[name = tensor<string, []>("op_6632_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_19 = split(axis = var_6632_axis_0, split_sizes = tile_72, x = var_6631_cast_fp16)[name = tensor<string, []>("op_6632_cast_fp16")];
+            tensor<int32, [4]> var_6653_perm_0 = const()[name = tensor<string, []>("op_6653_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_73 = const()[name = tensor<string, []>("tile_73"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6654_axis_0 = const()[name = tensor<string, []>("op_6654_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_6653_cast_fp16 = transpose(perm = var_6653_perm_0, x = k_49_cast_fp16)[name = tensor<string, []>("transpose_8")];
+            tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_19 = split(axis = var_6654_axis_0, split_sizes = tile_73, x = var_6653_cast_fp16)[name = tensor<string, []>("op_6654_cast_fp16")];
+            tensor<int32, [20]> tile_74 = const()[name = tensor<string, []>("tile_74"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6675_axis_0 = const()[name = tensor<string, []>("op_6675_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_19 = split(axis = var_6675_axis_0, split_sizes = tile_74, x = var_6629_cast_fp16)[name = tensor<string, []>("op_6675_cast_fp16")];
+            tensor<string, []> aw_961_equation_0 = const()[name = tensor<string, []>("aw_961_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_961_cast_fp16 = einsum(equation = aw_961_equation_0, values = (var_6654_cast_fp16_0, var_6632_cast_fp16_0))[name = tensor<string, []>("aw_961_cast_fp16")];
+            tensor<string, []> aw_963_equation_0 = const()[name = tensor<string, []>("aw_963_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_963_cast_fp16 = einsum(equation = aw_963_equation_0, values = (var_6654_cast_fp16_1, var_6632_cast_fp16_1))[name = tensor<string, []>("aw_963_cast_fp16")];
+            tensor<string, []> aw_965_equation_0 = const()[name = tensor<string, []>("aw_965_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_965_cast_fp16 = einsum(equation = aw_965_equation_0, values = (var_6654_cast_fp16_2, var_6632_cast_fp16_2))[name = tensor<string, []>("aw_965_cast_fp16")];
+            tensor<string, []> aw_967_equation_0 = const()[name = tensor<string, []>("aw_967_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_967_cast_fp16 = einsum(equation = aw_967_equation_0, values = (var_6654_cast_fp16_3, var_6632_cast_fp16_3))[name = tensor<string, []>("aw_967_cast_fp16")];
+            tensor<string, []> aw_969_equation_0 = const()[name = tensor<string, []>("aw_969_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_969_cast_fp16 = einsum(equation = aw_969_equation_0, values = (var_6654_cast_fp16_4, var_6632_cast_fp16_4))[name = tensor<string, []>("aw_969_cast_fp16")];
+            tensor<string, []> aw_971_equation_0 = const()[name = tensor<string, []>("aw_971_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_971_cast_fp16 = einsum(equation = aw_971_equation_0, values = (var_6654_cast_fp16_5, var_6632_cast_fp16_5))[name = tensor<string, []>("aw_971_cast_fp16")];
+            tensor<string, []> aw_973_equation_0 = const()[name = tensor<string, []>("aw_973_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_973_cast_fp16 = einsum(equation = aw_973_equation_0, values = (var_6654_cast_fp16_6, var_6632_cast_fp16_6))[name = tensor<string, []>("aw_973_cast_fp16")];
+            tensor<string, []> aw_975_equation_0 = const()[name = tensor<string, []>("aw_975_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_975_cast_fp16 = einsum(equation = aw_975_equation_0, values = (var_6654_cast_fp16_7, var_6632_cast_fp16_7))[name = tensor<string, []>("aw_975_cast_fp16")];
+            tensor<string, []> aw_977_equation_0 = const()[name = tensor<string, []>("aw_977_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_977_cast_fp16 = einsum(equation = aw_977_equation_0, values = (var_6654_cast_fp16_8, var_6632_cast_fp16_8))[name = tensor<string, []>("aw_977_cast_fp16")];
+            tensor<string, []> aw_979_equation_0 = const()[name = tensor<string, []>("aw_979_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_979_cast_fp16 = einsum(equation = aw_979_equation_0, values = (var_6654_cast_fp16_9, var_6632_cast_fp16_9))[name = tensor<string, []>("aw_979_cast_fp16")];
+            tensor<string, []> aw_981_equation_0 = const()[name = tensor<string, []>("aw_981_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_981_cast_fp16 = einsum(equation = aw_981_equation_0, values = (var_6654_cast_fp16_10, var_6632_cast_fp16_10))[name = tensor<string, []>("aw_981_cast_fp16")];
+            tensor<string, []> aw_983_equation_0 = const()[name = tensor<string, []>("aw_983_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_983_cast_fp16 = einsum(equation = aw_983_equation_0, values = (var_6654_cast_fp16_11, var_6632_cast_fp16_11))[name = tensor<string, []>("aw_983_cast_fp16")];
+            tensor<string, []> aw_985_equation_0 = const()[name = tensor<string, []>("aw_985_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_985_cast_fp16 = einsum(equation = aw_985_equation_0, values = (var_6654_cast_fp16_12, var_6632_cast_fp16_12))[name = tensor<string, []>("aw_985_cast_fp16")];
+            tensor<string, []> aw_987_equation_0 = const()[name = tensor<string, []>("aw_987_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_987_cast_fp16 = einsum(equation = aw_987_equation_0, values = (var_6654_cast_fp16_13, var_6632_cast_fp16_13))[name = tensor<string, []>("aw_987_cast_fp16")];
+            tensor<string, []> aw_989_equation_0 = const()[name = tensor<string, []>("aw_989_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_989_cast_fp16 = einsum(equation = aw_989_equation_0, values = (var_6654_cast_fp16_14, var_6632_cast_fp16_14))[name = tensor<string, []>("aw_989_cast_fp16")];
+            tensor<string, []> aw_991_equation_0 = const()[name = tensor<string, []>("aw_991_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_991_cast_fp16 = einsum(equation = aw_991_equation_0, values = (var_6654_cast_fp16_15, var_6632_cast_fp16_15))[name = tensor<string, []>("aw_991_cast_fp16")];
+            tensor<string, []> aw_993_equation_0 = const()[name = tensor<string, []>("aw_993_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_993_cast_fp16 = einsum(equation = aw_993_equation_0, values = (var_6654_cast_fp16_16, var_6632_cast_fp16_16))[name = tensor<string, []>("aw_993_cast_fp16")];
+            tensor<string, []> aw_995_equation_0 = const()[name = tensor<string, []>("aw_995_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_995_cast_fp16 = einsum(equation = aw_995_equation_0, values = (var_6654_cast_fp16_17, var_6632_cast_fp16_17))[name = tensor<string, []>("aw_995_cast_fp16")];
+            tensor<string, []> aw_997_equation_0 = const()[name = tensor<string, []>("aw_997_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_997_cast_fp16 = einsum(equation = aw_997_equation_0, values = (var_6654_cast_fp16_18, var_6632_cast_fp16_18))[name = tensor<string, []>("aw_997_cast_fp16")];
+            tensor<string, []> aw_999_equation_0 = const()[name = tensor<string, []>("aw_999_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_999_cast_fp16 = einsum(equation = aw_999_equation_0, values = (var_6654_cast_fp16_19, var_6632_cast_fp16_19))[name = tensor<string, []>("aw_999_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6736_cast_fp16 = softmax(axis = var_6580, x = aw_961_cast_fp16)[name = tensor<string, []>("op_6736_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6737_cast_fp16 = softmax(axis = var_6580, x = aw_963_cast_fp16)[name = tensor<string, []>("op_6737_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6738_cast_fp16 = softmax(axis = var_6580, x = aw_965_cast_fp16)[name = tensor<string, []>("op_6738_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6739_cast_fp16 = softmax(axis = var_6580, x = aw_967_cast_fp16)[name = tensor<string, []>("op_6739_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6740_cast_fp16 = softmax(axis = var_6580, x = aw_969_cast_fp16)[name = tensor<string, []>("op_6740_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6741_cast_fp16 = softmax(axis = var_6580, x = aw_971_cast_fp16)[name = tensor<string, []>("op_6741_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6742_cast_fp16 = softmax(axis = var_6580, x = aw_973_cast_fp16)[name = tensor<string, []>("op_6742_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6743_cast_fp16 = softmax(axis = var_6580, x = aw_975_cast_fp16)[name = tensor<string, []>("op_6743_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6744_cast_fp16 = softmax(axis = var_6580, x = aw_977_cast_fp16)[name = tensor<string, []>("op_6744_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6745_cast_fp16 = softmax(axis = var_6580, x = aw_979_cast_fp16)[name = tensor<string, []>("op_6745_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6746_cast_fp16 = softmax(axis = var_6580, x = aw_981_cast_fp16)[name = tensor<string, []>("op_6746_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6747_cast_fp16 = softmax(axis = var_6580, x = aw_983_cast_fp16)[name = tensor<string, []>("op_6747_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6748_cast_fp16 = softmax(axis = var_6580, x = aw_985_cast_fp16)[name = tensor<string, []>("op_6748_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6749_cast_fp16 = softmax(axis = var_6580, x = aw_987_cast_fp16)[name = tensor<string, []>("op_6749_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6750_cast_fp16 = softmax(axis = var_6580, x = aw_989_cast_fp16)[name = tensor<string, []>("op_6750_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6751_cast_fp16 = softmax(axis = var_6580, x = aw_991_cast_fp16)[name = tensor<string, []>("op_6751_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6752_cast_fp16 = softmax(axis = var_6580, x = aw_993_cast_fp16)[name = tensor<string, []>("op_6752_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6753_cast_fp16 = softmax(axis = var_6580, x = aw_995_cast_fp16)[name = tensor<string, []>("op_6753_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6754_cast_fp16 = softmax(axis = var_6580, x = aw_997_cast_fp16)[name = tensor<string, []>("op_6754_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6755_cast_fp16 = softmax(axis = var_6580, x = aw_999_cast_fp16)[name = tensor<string, []>("op_6755_cast_fp16")];
+            tensor<string, []> var_6757_equation_0 = const()[name = tensor<string, []>("op_6757_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6757_cast_fp16 = einsum(equation = var_6757_equation_0, values = (var_6675_cast_fp16_0, var_6736_cast_fp16))[name = tensor<string, []>("op_6757_cast_fp16")];
+            tensor<string, []> var_6759_equation_0 = const()[name = tensor<string, []>("op_6759_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6759_cast_fp16 = einsum(equation = var_6759_equation_0, values = (var_6675_cast_fp16_1, var_6737_cast_fp16))[name = tensor<string, []>("op_6759_cast_fp16")];
+            tensor<string, []> var_6761_equation_0 = const()[name = tensor<string, []>("op_6761_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6761_cast_fp16 = einsum(equation = var_6761_equation_0, values = (var_6675_cast_fp16_2, var_6738_cast_fp16))[name = tensor<string, []>("op_6761_cast_fp16")];
+            tensor<string, []> var_6763_equation_0 = const()[name = tensor<string, []>("op_6763_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6763_cast_fp16 = einsum(equation = var_6763_equation_0, values = (var_6675_cast_fp16_3, var_6739_cast_fp16))[name = tensor<string, []>("op_6763_cast_fp16")];
+            tensor<string, []> var_6765_equation_0 = const()[name = tensor<string, []>("op_6765_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6765_cast_fp16 = einsum(equation = var_6765_equation_0, values = (var_6675_cast_fp16_4, var_6740_cast_fp16))[name = tensor<string, []>("op_6765_cast_fp16")];
+            tensor<string, []> var_6767_equation_0 = const()[name = tensor<string, []>("op_6767_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6767_cast_fp16 = einsum(equation = var_6767_equation_0, values = (var_6675_cast_fp16_5, var_6741_cast_fp16))[name = tensor<string, []>("op_6767_cast_fp16")];
+            tensor<string, []> var_6769_equation_0 = const()[name = tensor<string, []>("op_6769_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6769_cast_fp16 = einsum(equation = var_6769_equation_0, values = (var_6675_cast_fp16_6, var_6742_cast_fp16))[name = tensor<string, []>("op_6769_cast_fp16")];
+            tensor<string, []> var_6771_equation_0 = const()[name = tensor<string, []>("op_6771_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6771_cast_fp16 = einsum(equation = var_6771_equation_0, values = (var_6675_cast_fp16_7, var_6743_cast_fp16))[name = tensor<string, []>("op_6771_cast_fp16")];
+            tensor<string, []> var_6773_equation_0 = const()[name = tensor<string, []>("op_6773_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6773_cast_fp16 = einsum(equation = var_6773_equation_0, values = (var_6675_cast_fp16_8, var_6744_cast_fp16))[name = tensor<string, []>("op_6773_cast_fp16")];
+            tensor<string, []> var_6775_equation_0 = const()[name = tensor<string, []>("op_6775_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6775_cast_fp16 = einsum(equation = var_6775_equation_0, values = (var_6675_cast_fp16_9, var_6745_cast_fp16))[name = tensor<string, []>("op_6775_cast_fp16")];
+            tensor<string, []> var_6777_equation_0 = const()[name = tensor<string, []>("op_6777_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6777_cast_fp16 = einsum(equation = var_6777_equation_0, values = (var_6675_cast_fp16_10, var_6746_cast_fp16))[name = tensor<string, []>("op_6777_cast_fp16")];
+            tensor<string, []> var_6779_equation_0 = const()[name = tensor<string, []>("op_6779_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6779_cast_fp16 = einsum(equation = var_6779_equation_0, values = (var_6675_cast_fp16_11, var_6747_cast_fp16))[name = tensor<string, []>("op_6779_cast_fp16")];
+            tensor<string, []> var_6781_equation_0 = const()[name = tensor<string, []>("op_6781_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6781_cast_fp16 = einsum(equation = var_6781_equation_0, values = (var_6675_cast_fp16_12, var_6748_cast_fp16))[name = tensor<string, []>("op_6781_cast_fp16")];
+            tensor<string, []> var_6783_equation_0 = const()[name = tensor<string, []>("op_6783_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6783_cast_fp16 = einsum(equation = var_6783_equation_0, values = (var_6675_cast_fp16_13, var_6749_cast_fp16))[name = tensor<string, []>("op_6783_cast_fp16")];
+            tensor<string, []> var_6785_equation_0 = const()[name = tensor<string, []>("op_6785_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6785_cast_fp16 = einsum(equation = var_6785_equation_0, values = (var_6675_cast_fp16_14, var_6750_cast_fp16))[name = tensor<string, []>("op_6785_cast_fp16")];
+            tensor<string, []> var_6787_equation_0 = const()[name = tensor<string, []>("op_6787_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6787_cast_fp16 = einsum(equation = var_6787_equation_0, values = (var_6675_cast_fp16_15, var_6751_cast_fp16))[name = tensor<string, []>("op_6787_cast_fp16")];
+            tensor<string, []> var_6789_equation_0 = const()[name = tensor<string, []>("op_6789_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6789_cast_fp16 = einsum(equation = var_6789_equation_0, values = (var_6675_cast_fp16_16, var_6752_cast_fp16))[name = tensor<string, []>("op_6789_cast_fp16")];
+            tensor<string, []> var_6791_equation_0 = const()[name = tensor<string, []>("op_6791_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6791_cast_fp16 = einsum(equation = var_6791_equation_0, values = (var_6675_cast_fp16_17, var_6753_cast_fp16))[name = tensor<string, []>("op_6791_cast_fp16")];
+            tensor<string, []> var_6793_equation_0 = const()[name = tensor<string, []>("op_6793_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6793_cast_fp16 = einsum(equation = var_6793_equation_0, values = (var_6675_cast_fp16_18, var_6754_cast_fp16))[name = tensor<string, []>("op_6793_cast_fp16")];
+            tensor<string, []> var_6795_equation_0 = const()[name = tensor<string, []>("op_6795_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6795_cast_fp16 = einsum(equation = var_6795_equation_0, values = (var_6675_cast_fp16_19, var_6755_cast_fp16))[name = tensor<string, []>("op_6795_cast_fp16")];
+            tensor<bool, []> input_245_interleave_0 = const()[name = tensor<string, []>("input_245_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_245_cast_fp16 = concat(axis = var_6580, interleave = input_245_interleave_0, values = (var_6757_cast_fp16, var_6759_cast_fp16, var_6761_cast_fp16, var_6763_cast_fp16, var_6765_cast_fp16, var_6767_cast_fp16, var_6769_cast_fp16, var_6771_cast_fp16, var_6773_cast_fp16, var_6775_cast_fp16, var_6777_cast_fp16, var_6779_cast_fp16, var_6781_cast_fp16, var_6783_cast_fp16, var_6785_cast_fp16, var_6787_cast_fp16, var_6789_cast_fp16, var_6791_cast_fp16, var_6793_cast_fp16, var_6795_cast_fp16))[name = tensor<string, []>("input_245_cast_fp16")];
+            tensor<string, []> var_6804_pad_type_0 = const()[name = tensor<string, []>("op_6804_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6804_strides_0 = const()[name = tensor<string, []>("op_6804_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6804_pad_0 = const()[name = tensor<string, []>("op_6804_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6804_dilations_0 = const()[name = tensor<string, []>("op_6804_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6804_groups_0 = const()[name = tensor<string, []>("op_6804_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_24_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_24_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(968610112)))];
+            tensor<fp16, [1280]> blocks_24_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_24_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(971886976)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6804_cast_fp16 = conv(bias = blocks_24_attn_out_bias_to_fp16, dilations = var_6804_dilations_0, groups = var_6804_groups_0, pad = var_6804_pad_0, pad_type = var_6804_pad_type_0, strides = var_6804_strides_0, weight = blocks_24_attn_out_weight_to_fp16, x = input_245_cast_fp16)[name = tensor<string, []>("op_6804_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_99_cast_fp16 = add(x = inputs_97_cast_fp16, y = var_6804_cast_fp16)[name = tensor<string, []>("inputs_99_cast_fp16")];
+            tensor<int32, [1]> input_247_axes_0 = const()[name = tensor<string, []>("input_247_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_247_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_247_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(971889600)))];
+            tensor<fp16, [1280]> input_247_beta_0_to_fp16 = const()[name = tensor<string, []>("input_247_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(971892224)))];
+            tensor<fp16, []> var_6814_to_fp16 = const()[name = tensor<string, []>("op_6814_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_247_cast_fp16 = layer_norm(axes = input_247_axes_0, beta = input_247_beta_0_to_fp16, epsilon = var_6814_to_fp16, gamma = input_247_gamma_0_to_fp16, x = inputs_99_cast_fp16)[name = tensor<string, []>("input_247_cast_fp16")];
+            tensor<string, []> input_249_pad_type_0 = const()[name = tensor<string, []>("input_249_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_249_strides_0 = const()[name = tensor<string, []>("input_249_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_249_pad_0 = const()[name = tensor<string, []>("input_249_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_249_dilations_0 = const()[name = tensor<string, []>("input_249_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_249_groups_0 = const()[name = tensor<string, []>("input_249_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_24_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_24_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(971894848)))];
+            tensor<fp16, [5120]> blocks_24_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_24_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(985002112)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_249_cast_fp16 = conv(bias = blocks_24_mlp_0_bias_to_fp16, dilations = input_249_dilations_0, groups = input_249_groups_0, pad = input_249_pad_0, pad_type = input_249_pad_type_0, strides = input_249_strides_0, weight = blocks_24_mlp_0_weight_to_fp16, x = input_247_cast_fp16)[name = tensor<string, []>("input_249_cast_fp16")];
+            tensor<string, []> input_251_mode_0 = const()[name = tensor<string, []>("input_251_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_251_cast_fp16 = gelu(mode = input_251_mode_0, x = input_249_cast_fp16)[name = tensor<string, []>("input_251_cast_fp16")];
+            tensor<string, []> var_6840_pad_type_0 = const()[name = tensor<string, []>("op_6840_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6840_strides_0 = const()[name = tensor<string, []>("op_6840_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6840_pad_0 = const()[name = tensor<string, []>("op_6840_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6840_dilations_0 = const()[name = tensor<string, []>("op_6840_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6840_groups_0 = const()[name = tensor<string, []>("op_6840_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_24_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_24_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(985012416)))];
+            tensor<fp16, [1280]> blocks_24_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_24_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(998119680)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6840_cast_fp16 = conv(bias = blocks_24_mlp_2_bias_to_fp16, dilations = var_6840_dilations_0, groups = var_6840_groups_0, pad = var_6840_pad_0, pad_type = var_6840_pad_type_0, strides = var_6840_strides_0, weight = blocks_24_mlp_2_weight_to_fp16, x = input_251_cast_fp16)[name = tensor<string, []>("op_6840_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_101_cast_fp16 = add(x = inputs_99_cast_fp16, y = var_6840_cast_fp16)[name = tensor<string, []>("inputs_101_cast_fp16")];
+            tensor<int32, []> var_6849 = const()[name = tensor<string, []>("op_6849"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_253_axes_0 = const()[name = tensor<string, []>("input_253_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_253_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_253_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(998122304)))];
+            tensor<fp16, [1280]> input_253_beta_0_to_fp16 = const()[name = tensor<string, []>("input_253_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(998124928)))];
+            tensor<fp16, []> var_6865_to_fp16 = const()[name = tensor<string, []>("op_6865_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_253_cast_fp16 = layer_norm(axes = input_253_axes_0, beta = input_253_beta_0_to_fp16, epsilon = var_6865_to_fp16, gamma = input_253_gamma_0_to_fp16, x = inputs_101_cast_fp16)[name = tensor<string, []>("input_253_cast_fp16")];
+            tensor<string, []> q_51_pad_type_0 = const()[name = tensor<string, []>("q_51_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_51_strides_0 = const()[name = tensor<string, []>("q_51_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_51_pad_0 = const()[name = tensor<string, []>("q_51_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_51_dilations_0 = const()[name = tensor<string, []>("q_51_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_51_groups_0 = const()[name = tensor<string, []>("q_51_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_6900_weight_0_to_fp16 = const()[name = tensor<string, []>("op_6900_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(998127552)))];
+            tensor<fp16, [1280]> var_6900_bias_0_to_fp16 = const()[name = tensor<string, []>("op_6900_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1001404416)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6900_cast_fp16 = conv(bias = var_6900_bias_0_to_fp16, dilations = q_51_dilations_0, groups = q_51_groups_0, pad = q_51_pad_0, pad_type = q_51_pad_type_0, strides = q_51_strides_0, weight = var_6900_weight_0_to_fp16, x = input_253_cast_fp16)[name = tensor<string, []>("op_6900_cast_fp16")];
+            tensor<string, []> k_51_pad_type_0 = const()[name = tensor<string, []>("k_51_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_51_strides_0 = const()[name = tensor<string, []>("k_51_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_51_pad_0 = const()[name = tensor<string, []>("k_51_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_51_dilations_0 = const()[name = tensor<string, []>("k_51_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_51_groups_0 = const()[name = tensor<string, []>("k_51_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_25_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_25_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1001407040)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_51_cast_fp16 = conv(dilations = k_51_dilations_0, groups = k_51_groups_0, pad = k_51_pad_0, pad_type = k_51_pad_type_0, strides = k_51_strides_0, weight = blocks_25_attn_key_weight_to_fp16, x = input_253_cast_fp16)[name = tensor<string, []>("k_51_cast_fp16")];
+            tensor<string, []> var_6898_pad_type_0 = const()[name = tensor<string, []>("op_6898_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6898_strides_0 = const()[name = tensor<string, []>("op_6898_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6898_pad_0 = const()[name = tensor<string, []>("op_6898_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6898_dilations_0 = const()[name = tensor<string, []>("op_6898_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6898_groups_0 = const()[name = tensor<string, []>("op_6898_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_25_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_25_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1004683904)))];
+            tensor<fp16, [1280]> blocks_25_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_25_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1007960768)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6898_cast_fp16 = conv(bias = blocks_25_attn_value_bias_to_fp16, dilations = var_6898_dilations_0, groups = var_6898_groups_0, pad = var_6898_pad_0, pad_type = var_6898_pad_type_0, strides = var_6898_strides_0, weight = blocks_25_attn_value_weight_to_fp16, x = input_253_cast_fp16)[name = tensor<string, []>("op_6898_cast_fp16")];
+            tensor<int32, [20]> tile_75 = const()[name = tensor<string, []>("tile_75"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6901_axis_0 = const()[name = tensor<string, []>("op_6901_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_19 = split(axis = var_6901_axis_0, split_sizes = tile_75, x = var_6900_cast_fp16)[name = tensor<string, []>("op_6901_cast_fp16")];
+            tensor<int32, [4]> var_6922_perm_0 = const()[name = tensor<string, []>("op_6922_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_76 = const()[name = tensor<string, []>("tile_76"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6923_axis_0 = const()[name = tensor<string, []>("op_6923_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_6922_cast_fp16 = transpose(perm = var_6922_perm_0, x = k_51_cast_fp16)[name = tensor<string, []>("transpose_7")];
+            tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_19 = split(axis = var_6923_axis_0, split_sizes = tile_76, x = var_6922_cast_fp16)[name = tensor<string, []>("op_6923_cast_fp16")];
+            tensor<int32, [20]> tile_77 = const()[name = tensor<string, []>("tile_77"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6944_axis_0 = const()[name = tensor<string, []>("op_6944_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_19 = split(axis = var_6944_axis_0, split_sizes = tile_77, x = var_6898_cast_fp16)[name = tensor<string, []>("op_6944_cast_fp16")];
+            tensor<string, []> aw_1001_equation_0 = const()[name = tensor<string, []>("aw_1001_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1001_cast_fp16 = einsum(equation = aw_1001_equation_0, values = (var_6923_cast_fp16_0, var_6901_cast_fp16_0))[name = tensor<string, []>("aw_1001_cast_fp16")];
+            tensor<string, []> aw_1003_equation_0 = const()[name = tensor<string, []>("aw_1003_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1003_cast_fp16 = einsum(equation = aw_1003_equation_0, values = (var_6923_cast_fp16_1, var_6901_cast_fp16_1))[name = tensor<string, []>("aw_1003_cast_fp16")];
+            tensor<string, []> aw_1005_equation_0 = const()[name = tensor<string, []>("aw_1005_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1005_cast_fp16 = einsum(equation = aw_1005_equation_0, values = (var_6923_cast_fp16_2, var_6901_cast_fp16_2))[name = tensor<string, []>("aw_1005_cast_fp16")];
+            tensor<string, []> aw_1007_equation_0 = const()[name = tensor<string, []>("aw_1007_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1007_cast_fp16 = einsum(equation = aw_1007_equation_0, values = (var_6923_cast_fp16_3, var_6901_cast_fp16_3))[name = tensor<string, []>("aw_1007_cast_fp16")];
+            tensor<string, []> aw_1009_equation_0 = const()[name = tensor<string, []>("aw_1009_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1009_cast_fp16 = einsum(equation = aw_1009_equation_0, values = (var_6923_cast_fp16_4, var_6901_cast_fp16_4))[name = tensor<string, []>("aw_1009_cast_fp16")];
+            tensor<string, []> aw_1011_equation_0 = const()[name = tensor<string, []>("aw_1011_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1011_cast_fp16 = einsum(equation = aw_1011_equation_0, values = (var_6923_cast_fp16_5, var_6901_cast_fp16_5))[name = tensor<string, []>("aw_1011_cast_fp16")];
+            tensor<string, []> aw_1013_equation_0 = const()[name = tensor<string, []>("aw_1013_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1013_cast_fp16 = einsum(equation = aw_1013_equation_0, values = (var_6923_cast_fp16_6, var_6901_cast_fp16_6))[name = tensor<string, []>("aw_1013_cast_fp16")];
+            tensor<string, []> aw_1015_equation_0 = const()[name = tensor<string, []>("aw_1015_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1015_cast_fp16 = einsum(equation = aw_1015_equation_0, values = (var_6923_cast_fp16_7, var_6901_cast_fp16_7))[name = tensor<string, []>("aw_1015_cast_fp16")];
+            tensor<string, []> aw_1017_equation_0 = const()[name = tensor<string, []>("aw_1017_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1017_cast_fp16 = einsum(equation = aw_1017_equation_0, values = (var_6923_cast_fp16_8, var_6901_cast_fp16_8))[name = tensor<string, []>("aw_1017_cast_fp16")];
+            tensor<string, []> aw_1019_equation_0 = const()[name = tensor<string, []>("aw_1019_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1019_cast_fp16 = einsum(equation = aw_1019_equation_0, values = (var_6923_cast_fp16_9, var_6901_cast_fp16_9))[name = tensor<string, []>("aw_1019_cast_fp16")];
+            tensor<string, []> aw_1021_equation_0 = const()[name = tensor<string, []>("aw_1021_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1021_cast_fp16 = einsum(equation = aw_1021_equation_0, values = (var_6923_cast_fp16_10, var_6901_cast_fp16_10))[name = tensor<string, []>("aw_1021_cast_fp16")];
+            tensor<string, []> aw_1023_equation_0 = const()[name = tensor<string, []>("aw_1023_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1023_cast_fp16 = einsum(equation = aw_1023_equation_0, values = (var_6923_cast_fp16_11, var_6901_cast_fp16_11))[name = tensor<string, []>("aw_1023_cast_fp16")];
+            tensor<string, []> aw_1025_equation_0 = const()[name = tensor<string, []>("aw_1025_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1025_cast_fp16 = einsum(equation = aw_1025_equation_0, values = (var_6923_cast_fp16_12, var_6901_cast_fp16_12))[name = tensor<string, []>("aw_1025_cast_fp16")];
+            tensor<string, []> aw_1027_equation_0 = const()[name = tensor<string, []>("aw_1027_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1027_cast_fp16 = einsum(equation = aw_1027_equation_0, values = (var_6923_cast_fp16_13, var_6901_cast_fp16_13))[name = tensor<string, []>("aw_1027_cast_fp16")];
+            tensor<string, []> aw_1029_equation_0 = const()[name = tensor<string, []>("aw_1029_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1029_cast_fp16 = einsum(equation = aw_1029_equation_0, values = (var_6923_cast_fp16_14, var_6901_cast_fp16_14))[name = tensor<string, []>("aw_1029_cast_fp16")];
+            tensor<string, []> aw_1031_equation_0 = const()[name = tensor<string, []>("aw_1031_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1031_cast_fp16 = einsum(equation = aw_1031_equation_0, values = (var_6923_cast_fp16_15, var_6901_cast_fp16_15))[name = tensor<string, []>("aw_1031_cast_fp16")];
+            tensor<string, []> aw_1033_equation_0 = const()[name = tensor<string, []>("aw_1033_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1033_cast_fp16 = einsum(equation = aw_1033_equation_0, values = (var_6923_cast_fp16_16, var_6901_cast_fp16_16))[name = tensor<string, []>("aw_1033_cast_fp16")];
+            tensor<string, []> aw_1035_equation_0 = const()[name = tensor<string, []>("aw_1035_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1035_cast_fp16 = einsum(equation = aw_1035_equation_0, values = (var_6923_cast_fp16_17, var_6901_cast_fp16_17))[name = tensor<string, []>("aw_1035_cast_fp16")];
+            tensor<string, []> aw_1037_equation_0 = const()[name = tensor<string, []>("aw_1037_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1037_cast_fp16 = einsum(equation = aw_1037_equation_0, values = (var_6923_cast_fp16_18, var_6901_cast_fp16_18))[name = tensor<string, []>("aw_1037_cast_fp16")];
+            tensor<string, []> aw_1039_equation_0 = const()[name = tensor<string, []>("aw_1039_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1039_cast_fp16 = einsum(equation = aw_1039_equation_0, values = (var_6923_cast_fp16_19, var_6901_cast_fp16_19))[name = tensor<string, []>("aw_1039_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7005_cast_fp16 = softmax(axis = var_6849, x = aw_1001_cast_fp16)[name = tensor<string, []>("op_7005_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7006_cast_fp16 = softmax(axis = var_6849, x = aw_1003_cast_fp16)[name = tensor<string, []>("op_7006_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7007_cast_fp16 = softmax(axis = var_6849, x = aw_1005_cast_fp16)[name = tensor<string, []>("op_7007_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7008_cast_fp16 = softmax(axis = var_6849, x = aw_1007_cast_fp16)[name = tensor<string, []>("op_7008_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7009_cast_fp16 = softmax(axis = var_6849, x = aw_1009_cast_fp16)[name = tensor<string, []>("op_7009_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7010_cast_fp16 = softmax(axis = var_6849, x = aw_1011_cast_fp16)[name = tensor<string, []>("op_7010_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7011_cast_fp16 = softmax(axis = var_6849, x = aw_1013_cast_fp16)[name = tensor<string, []>("op_7011_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7012_cast_fp16 = softmax(axis = var_6849, x = aw_1015_cast_fp16)[name = tensor<string, []>("op_7012_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7013_cast_fp16 = softmax(axis = var_6849, x = aw_1017_cast_fp16)[name = tensor<string, []>("op_7013_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7014_cast_fp16 = softmax(axis = var_6849, x = aw_1019_cast_fp16)[name = tensor<string, []>("op_7014_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7015_cast_fp16 = softmax(axis = var_6849, x = aw_1021_cast_fp16)[name = tensor<string, []>("op_7015_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7016_cast_fp16 = softmax(axis = var_6849, x = aw_1023_cast_fp16)[name = tensor<string, []>("op_7016_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7017_cast_fp16 = softmax(axis = var_6849, x = aw_1025_cast_fp16)[name = tensor<string, []>("op_7017_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7018_cast_fp16 = softmax(axis = var_6849, x = aw_1027_cast_fp16)[name = tensor<string, []>("op_7018_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7019_cast_fp16 = softmax(axis = var_6849, x = aw_1029_cast_fp16)[name = tensor<string, []>("op_7019_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7020_cast_fp16 = softmax(axis = var_6849, x = aw_1031_cast_fp16)[name = tensor<string, []>("op_7020_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7021_cast_fp16 = softmax(axis = var_6849, x = aw_1033_cast_fp16)[name = tensor<string, []>("op_7021_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7022_cast_fp16 = softmax(axis = var_6849, x = aw_1035_cast_fp16)[name = tensor<string, []>("op_7022_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7023_cast_fp16 = softmax(axis = var_6849, x = aw_1037_cast_fp16)[name = tensor<string, []>("op_7023_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7024_cast_fp16 = softmax(axis = var_6849, x = aw_1039_cast_fp16)[name = tensor<string, []>("op_7024_cast_fp16")];
+            tensor<string, []> var_7026_equation_0 = const()[name = tensor<string, []>("op_7026_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7026_cast_fp16 = einsum(equation = var_7026_equation_0, values = (var_6944_cast_fp16_0, var_7005_cast_fp16))[name = tensor<string, []>("op_7026_cast_fp16")];
+            tensor<string, []> var_7028_equation_0 = const()[name = tensor<string, []>("op_7028_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7028_cast_fp16 = einsum(equation = var_7028_equation_0, values = (var_6944_cast_fp16_1, var_7006_cast_fp16))[name = tensor<string, []>("op_7028_cast_fp16")];
+            tensor<string, []> var_7030_equation_0 = const()[name = tensor<string, []>("op_7030_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7030_cast_fp16 = einsum(equation = var_7030_equation_0, values = (var_6944_cast_fp16_2, var_7007_cast_fp16))[name = tensor<string, []>("op_7030_cast_fp16")];
+            tensor<string, []> var_7032_equation_0 = const()[name = tensor<string, []>("op_7032_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7032_cast_fp16 = einsum(equation = var_7032_equation_0, values = (var_6944_cast_fp16_3, var_7008_cast_fp16))[name = tensor<string, []>("op_7032_cast_fp16")];
+            tensor<string, []> var_7034_equation_0 = const()[name = tensor<string, []>("op_7034_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7034_cast_fp16 = einsum(equation = var_7034_equation_0, values = (var_6944_cast_fp16_4, var_7009_cast_fp16))[name = tensor<string, []>("op_7034_cast_fp16")];
+            tensor<string, []> var_7036_equation_0 = const()[name = tensor<string, []>("op_7036_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7036_cast_fp16 = einsum(equation = var_7036_equation_0, values = (var_6944_cast_fp16_5, var_7010_cast_fp16))[name = tensor<string, []>("op_7036_cast_fp16")];
+            tensor<string, []> var_7038_equation_0 = const()[name = tensor<string, []>("op_7038_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7038_cast_fp16 = einsum(equation = var_7038_equation_0, values = (var_6944_cast_fp16_6, var_7011_cast_fp16))[name = tensor<string, []>("op_7038_cast_fp16")];
+            tensor<string, []> var_7040_equation_0 = const()[name = tensor<string, []>("op_7040_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7040_cast_fp16 = einsum(equation = var_7040_equation_0, values = (var_6944_cast_fp16_7, var_7012_cast_fp16))[name = tensor<string, []>("op_7040_cast_fp16")];
+            tensor<string, []> var_7042_equation_0 = const()[name = tensor<string, []>("op_7042_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7042_cast_fp16 = einsum(equation = var_7042_equation_0, values = (var_6944_cast_fp16_8, var_7013_cast_fp16))[name = tensor<string, []>("op_7042_cast_fp16")];
+            tensor<string, []> var_7044_equation_0 = const()[name = tensor<string, []>("op_7044_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7044_cast_fp16 = einsum(equation = var_7044_equation_0, values = (var_6944_cast_fp16_9, var_7014_cast_fp16))[name = tensor<string, []>("op_7044_cast_fp16")];
+            tensor<string, []> var_7046_equation_0 = const()[name = tensor<string, []>("op_7046_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7046_cast_fp16 = einsum(equation = var_7046_equation_0, values = (var_6944_cast_fp16_10, var_7015_cast_fp16))[name = tensor<string, []>("op_7046_cast_fp16")];
+            tensor<string, []> var_7048_equation_0 = const()[name = tensor<string, []>("op_7048_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7048_cast_fp16 = einsum(equation = var_7048_equation_0, values = (var_6944_cast_fp16_11, var_7016_cast_fp16))[name = tensor<string, []>("op_7048_cast_fp16")];
+            tensor<string, []> var_7050_equation_0 = const()[name = tensor<string, []>("op_7050_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7050_cast_fp16 = einsum(equation = var_7050_equation_0, values = (var_6944_cast_fp16_12, var_7017_cast_fp16))[name = tensor<string, []>("op_7050_cast_fp16")];
+            tensor<string, []> var_7052_equation_0 = const()[name = tensor<string, []>("op_7052_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7052_cast_fp16 = einsum(equation = var_7052_equation_0, values = (var_6944_cast_fp16_13, var_7018_cast_fp16))[name = tensor<string, []>("op_7052_cast_fp16")];
+            tensor<string, []> var_7054_equation_0 = const()[name = tensor<string, []>("op_7054_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7054_cast_fp16 = einsum(equation = var_7054_equation_0, values = (var_6944_cast_fp16_14, var_7019_cast_fp16))[name = tensor<string, []>("op_7054_cast_fp16")];
+            tensor<string, []> var_7056_equation_0 = const()[name = tensor<string, []>("op_7056_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7056_cast_fp16 = einsum(equation = var_7056_equation_0, values = (var_6944_cast_fp16_15, var_7020_cast_fp16))[name = tensor<string, []>("op_7056_cast_fp16")];
+            tensor<string, []> var_7058_equation_0 = const()[name = tensor<string, []>("op_7058_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7058_cast_fp16 = einsum(equation = var_7058_equation_0, values = (var_6944_cast_fp16_16, var_7021_cast_fp16))[name = tensor<string, []>("op_7058_cast_fp16")];
+            tensor<string, []> var_7060_equation_0 = const()[name = tensor<string, []>("op_7060_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7060_cast_fp16 = einsum(equation = var_7060_equation_0, values = (var_6944_cast_fp16_17, var_7022_cast_fp16))[name = tensor<string, []>("op_7060_cast_fp16")];
+            tensor<string, []> var_7062_equation_0 = const()[name = tensor<string, []>("op_7062_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7062_cast_fp16 = einsum(equation = var_7062_equation_0, values = (var_6944_cast_fp16_18, var_7023_cast_fp16))[name = tensor<string, []>("op_7062_cast_fp16")];
+            tensor<string, []> var_7064_equation_0 = const()[name = tensor<string, []>("op_7064_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7064_cast_fp16 = einsum(equation = var_7064_equation_0, values = (var_6944_cast_fp16_19, var_7024_cast_fp16))[name = tensor<string, []>("op_7064_cast_fp16")];
+            tensor<bool, []> input_255_interleave_0 = const()[name = tensor<string, []>("input_255_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_255_cast_fp16 = concat(axis = var_6849, interleave = input_255_interleave_0, values = (var_7026_cast_fp16, var_7028_cast_fp16, var_7030_cast_fp16, var_7032_cast_fp16, var_7034_cast_fp16, var_7036_cast_fp16, var_7038_cast_fp16, var_7040_cast_fp16, var_7042_cast_fp16, var_7044_cast_fp16, var_7046_cast_fp16, var_7048_cast_fp16, var_7050_cast_fp16, var_7052_cast_fp16, var_7054_cast_fp16, var_7056_cast_fp16, var_7058_cast_fp16, var_7060_cast_fp16, var_7062_cast_fp16, var_7064_cast_fp16))[name = tensor<string, []>("input_255_cast_fp16")];
+            tensor<string, []> var_7073_pad_type_0 = const()[name = tensor<string, []>("op_7073_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7073_strides_0 = const()[name = tensor<string, []>("op_7073_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7073_pad_0 = const()[name = tensor<string, []>("op_7073_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7073_dilations_0 = const()[name = tensor<string, []>("op_7073_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7073_groups_0 = const()[name = tensor<string, []>("op_7073_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_25_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_25_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1007963392)))];
+            tensor<fp16, [1280]> blocks_25_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_25_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1011240256)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7073_cast_fp16 = conv(bias = blocks_25_attn_out_bias_to_fp16, dilations = var_7073_dilations_0, groups = var_7073_groups_0, pad = var_7073_pad_0, pad_type = var_7073_pad_type_0, strides = var_7073_strides_0, weight = blocks_25_attn_out_weight_to_fp16, x = input_255_cast_fp16)[name = tensor<string, []>("op_7073_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_103_cast_fp16 = add(x = inputs_101_cast_fp16, y = var_7073_cast_fp16)[name = tensor<string, []>("inputs_103_cast_fp16")];
+            tensor<int32, [1]> input_257_axes_0 = const()[name = tensor<string, []>("input_257_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_257_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_257_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1011242880)))];
+            tensor<fp16, [1280]> input_257_beta_0_to_fp16 = const()[name = tensor<string, []>("input_257_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1011245504)))];
+            tensor<fp16, []> var_7083_to_fp16 = const()[name = tensor<string, []>("op_7083_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_257_cast_fp16 = layer_norm(axes = input_257_axes_0, beta = input_257_beta_0_to_fp16, epsilon = var_7083_to_fp16, gamma = input_257_gamma_0_to_fp16, x = inputs_103_cast_fp16)[name = tensor<string, []>("input_257_cast_fp16")];
+            tensor<string, []> input_259_pad_type_0 = const()[name = tensor<string, []>("input_259_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_259_strides_0 = const()[name = tensor<string, []>("input_259_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_259_pad_0 = const()[name = tensor<string, []>("input_259_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_259_dilations_0 = const()[name = tensor<string, []>("input_259_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_259_groups_0 = const()[name = tensor<string, []>("input_259_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_25_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_25_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1011248128)))];
+            tensor<fp16, [5120]> blocks_25_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_25_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1024355392)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_259_cast_fp16 = conv(bias = blocks_25_mlp_0_bias_to_fp16, dilations = input_259_dilations_0, groups = input_259_groups_0, pad = input_259_pad_0, pad_type = input_259_pad_type_0, strides = input_259_strides_0, weight = blocks_25_mlp_0_weight_to_fp16, x = input_257_cast_fp16)[name = tensor<string, []>("input_259_cast_fp16")];
+            tensor<string, []> input_261_mode_0 = const()[name = tensor<string, []>("input_261_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_261_cast_fp16 = gelu(mode = input_261_mode_0, x = input_259_cast_fp16)[name = tensor<string, []>("input_261_cast_fp16")];
+            tensor<string, []> var_7109_pad_type_0 = const()[name = tensor<string, []>("op_7109_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7109_strides_0 = const()[name = tensor<string, []>("op_7109_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7109_pad_0 = const()[name = tensor<string, []>("op_7109_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7109_dilations_0 = const()[name = tensor<string, []>("op_7109_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7109_groups_0 = const()[name = tensor<string, []>("op_7109_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_25_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_25_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1024365696)))];
+            tensor<fp16, [1280]> blocks_25_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_25_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1037472960)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7109_cast_fp16 = conv(bias = blocks_25_mlp_2_bias_to_fp16, dilations = var_7109_dilations_0, groups = var_7109_groups_0, pad = var_7109_pad_0, pad_type = var_7109_pad_type_0, strides = var_7109_strides_0, weight = blocks_25_mlp_2_weight_to_fp16, x = input_261_cast_fp16)[name = tensor<string, []>("op_7109_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_105_cast_fp16 = add(x = inputs_103_cast_fp16, y = var_7109_cast_fp16)[name = tensor<string, []>("inputs_105_cast_fp16")];
+            tensor<int32, []> var_7118 = const()[name = tensor<string, []>("op_7118"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_263_axes_0 = const()[name = tensor<string, []>("input_263_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_263_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_263_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1037475584)))];
+            tensor<fp16, [1280]> input_263_beta_0_to_fp16 = const()[name = tensor<string, []>("input_263_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1037478208)))];
+            tensor<fp16, []> var_7134_to_fp16 = const()[name = tensor<string, []>("op_7134_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_263_cast_fp16 = layer_norm(axes = input_263_axes_0, beta = input_263_beta_0_to_fp16, epsilon = var_7134_to_fp16, gamma = input_263_gamma_0_to_fp16, x = inputs_105_cast_fp16)[name = tensor<string, []>("input_263_cast_fp16")];
+            tensor<string, []> q_53_pad_type_0 = const()[name = tensor<string, []>("q_53_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_53_strides_0 = const()[name = tensor<string, []>("q_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_53_pad_0 = const()[name = tensor<string, []>("q_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_53_dilations_0 = const()[name = tensor<string, []>("q_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_53_groups_0 = const()[name = tensor<string, []>("q_53_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_7169_weight_0_to_fp16 = const()[name = tensor<string, []>("op_7169_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1037480832)))];
+            tensor<fp16, [1280]> var_7169_bias_0_to_fp16 = const()[name = tensor<string, []>("op_7169_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1040757696)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7169_cast_fp16 = conv(bias = var_7169_bias_0_to_fp16, dilations = q_53_dilations_0, groups = q_53_groups_0, pad = q_53_pad_0, pad_type = q_53_pad_type_0, strides = q_53_strides_0, weight = var_7169_weight_0_to_fp16, x = input_263_cast_fp16)[name = tensor<string, []>("op_7169_cast_fp16")];
+            tensor<string, []> k_53_pad_type_0 = const()[name = tensor<string, []>("k_53_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_53_strides_0 = const()[name = tensor<string, []>("k_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_53_pad_0 = const()[name = tensor<string, []>("k_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_53_dilations_0 = const()[name = tensor<string, []>("k_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_53_groups_0 = const()[name = tensor<string, []>("k_53_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_26_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_26_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1040760320)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_53_cast_fp16 = conv(dilations = k_53_dilations_0, groups = k_53_groups_0, pad = k_53_pad_0, pad_type = k_53_pad_type_0, strides = k_53_strides_0, weight = blocks_26_attn_key_weight_to_fp16, x = input_263_cast_fp16)[name = tensor<string, []>("k_53_cast_fp16")];
+            tensor<string, []> var_7167_pad_type_0 = const()[name = tensor<string, []>("op_7167_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7167_strides_0 = const()[name = tensor<string, []>("op_7167_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7167_pad_0 = const()[name = tensor<string, []>("op_7167_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7167_dilations_0 = const()[name = tensor<string, []>("op_7167_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7167_groups_0 = const()[name = tensor<string, []>("op_7167_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_26_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_26_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1044037184)))];
+            tensor<fp16, [1280]> blocks_26_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_26_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1047314048)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7167_cast_fp16 = conv(bias = blocks_26_attn_value_bias_to_fp16, dilations = var_7167_dilations_0, groups = var_7167_groups_0, pad = var_7167_pad_0, pad_type = var_7167_pad_type_0, strides = var_7167_strides_0, weight = blocks_26_attn_value_weight_to_fp16, x = input_263_cast_fp16)[name = tensor<string, []>("op_7167_cast_fp16")];
+            tensor<int32, [20]> tile_78 = const()[name = tensor<string, []>("tile_78"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7170_axis_0 = const()[name = tensor<string, []>("op_7170_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_19 = split(axis = var_7170_axis_0, split_sizes = tile_78, x = var_7169_cast_fp16)[name = tensor<string, []>("op_7170_cast_fp16")];
+            tensor<int32, [4]> var_7191_perm_0 = const()[name = tensor<string, []>("op_7191_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_79 = const()[name = tensor<string, []>("tile_79"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7192_axis_0 = const()[name = tensor<string, []>("op_7192_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_7191_cast_fp16 = transpose(perm = var_7191_perm_0, x = k_53_cast_fp16)[name = tensor<string, []>("transpose_6")];
+            tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_19 = split(axis = var_7192_axis_0, split_sizes = tile_79, x = var_7191_cast_fp16)[name = tensor<string, []>("op_7192_cast_fp16")];
+            tensor<int32, [20]> tile_80 = const()[name = tensor<string, []>("tile_80"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7213_axis_0 = const()[name = tensor<string, []>("op_7213_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_19 = split(axis = var_7213_axis_0, split_sizes = tile_80, x = var_7167_cast_fp16)[name = tensor<string, []>("op_7213_cast_fp16")];
+            tensor<string, []> aw_1041_equation_0 = const()[name = tensor<string, []>("aw_1041_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1041_cast_fp16 = einsum(equation = aw_1041_equation_0, values = (var_7192_cast_fp16_0, var_7170_cast_fp16_0))[name = tensor<string, []>("aw_1041_cast_fp16")];
+            tensor<string, []> aw_1043_equation_0 = const()[name = tensor<string, []>("aw_1043_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1043_cast_fp16 = einsum(equation = aw_1043_equation_0, values = (var_7192_cast_fp16_1, var_7170_cast_fp16_1))[name = tensor<string, []>("aw_1043_cast_fp16")];
+            tensor<string, []> aw_1045_equation_0 = const()[name = tensor<string, []>("aw_1045_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1045_cast_fp16 = einsum(equation = aw_1045_equation_0, values = (var_7192_cast_fp16_2, var_7170_cast_fp16_2))[name = tensor<string, []>("aw_1045_cast_fp16")];
+            tensor<string, []> aw_1047_equation_0 = const()[name = tensor<string, []>("aw_1047_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1047_cast_fp16 = einsum(equation = aw_1047_equation_0, values = (var_7192_cast_fp16_3, var_7170_cast_fp16_3))[name = tensor<string, []>("aw_1047_cast_fp16")];
+            tensor<string, []> aw_1049_equation_0 = const()[name = tensor<string, []>("aw_1049_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1049_cast_fp16 = einsum(equation = aw_1049_equation_0, values = (var_7192_cast_fp16_4, var_7170_cast_fp16_4))[name = tensor<string, []>("aw_1049_cast_fp16")];
+            tensor<string, []> aw_1051_equation_0 = const()[name = tensor<string, []>("aw_1051_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1051_cast_fp16 = einsum(equation = aw_1051_equation_0, values = (var_7192_cast_fp16_5, var_7170_cast_fp16_5))[name = tensor<string, []>("aw_1051_cast_fp16")];
+            tensor<string, []> aw_1053_equation_0 = const()[name = tensor<string, []>("aw_1053_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1053_cast_fp16 = einsum(equation = aw_1053_equation_0, values = (var_7192_cast_fp16_6, var_7170_cast_fp16_6))[name = tensor<string, []>("aw_1053_cast_fp16")];
+            tensor<string, []> aw_1055_equation_0 = const()[name = tensor<string, []>("aw_1055_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1055_cast_fp16 = einsum(equation = aw_1055_equation_0, values = (var_7192_cast_fp16_7, var_7170_cast_fp16_7))[name = tensor<string, []>("aw_1055_cast_fp16")];
+            tensor<string, []> aw_1057_equation_0 = const()[name = tensor<string, []>("aw_1057_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1057_cast_fp16 = einsum(equation = aw_1057_equation_0, values = (var_7192_cast_fp16_8, var_7170_cast_fp16_8))[name = tensor<string, []>("aw_1057_cast_fp16")];
+            tensor<string, []> aw_1059_equation_0 = const()[name = tensor<string, []>("aw_1059_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1059_cast_fp16 = einsum(equation = aw_1059_equation_0, values = (var_7192_cast_fp16_9, var_7170_cast_fp16_9))[name = tensor<string, []>("aw_1059_cast_fp16")];
+            tensor<string, []> aw_1061_equation_0 = const()[name = tensor<string, []>("aw_1061_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1061_cast_fp16 = einsum(equation = aw_1061_equation_0, values = (var_7192_cast_fp16_10, var_7170_cast_fp16_10))[name = tensor<string, []>("aw_1061_cast_fp16")];
+            tensor<string, []> aw_1063_equation_0 = const()[name = tensor<string, []>("aw_1063_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1063_cast_fp16 = einsum(equation = aw_1063_equation_0, values = (var_7192_cast_fp16_11, var_7170_cast_fp16_11))[name = tensor<string, []>("aw_1063_cast_fp16")];
+            tensor<string, []> aw_1065_equation_0 = const()[name = tensor<string, []>("aw_1065_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1065_cast_fp16 = einsum(equation = aw_1065_equation_0, values = (var_7192_cast_fp16_12, var_7170_cast_fp16_12))[name = tensor<string, []>("aw_1065_cast_fp16")];
+            tensor<string, []> aw_1067_equation_0 = const()[name = tensor<string, []>("aw_1067_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1067_cast_fp16 = einsum(equation = aw_1067_equation_0, values = (var_7192_cast_fp16_13, var_7170_cast_fp16_13))[name = tensor<string, []>("aw_1067_cast_fp16")];
+            tensor<string, []> aw_1069_equation_0 = const()[name = tensor<string, []>("aw_1069_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1069_cast_fp16 = einsum(equation = aw_1069_equation_0, values = (var_7192_cast_fp16_14, var_7170_cast_fp16_14))[name = tensor<string, []>("aw_1069_cast_fp16")];
+            tensor<string, []> aw_1071_equation_0 = const()[name = tensor<string, []>("aw_1071_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1071_cast_fp16 = einsum(equation = aw_1071_equation_0, values = (var_7192_cast_fp16_15, var_7170_cast_fp16_15))[name = tensor<string, []>("aw_1071_cast_fp16")];
+            tensor<string, []> aw_1073_equation_0 = const()[name = tensor<string, []>("aw_1073_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1073_cast_fp16 = einsum(equation = aw_1073_equation_0, values = (var_7192_cast_fp16_16, var_7170_cast_fp16_16))[name = tensor<string, []>("aw_1073_cast_fp16")];
+            tensor<string, []> aw_1075_equation_0 = const()[name = tensor<string, []>("aw_1075_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1075_cast_fp16 = einsum(equation = aw_1075_equation_0, values = (var_7192_cast_fp16_17, var_7170_cast_fp16_17))[name = tensor<string, []>("aw_1075_cast_fp16")];
+            tensor<string, []> aw_1077_equation_0 = const()[name = tensor<string, []>("aw_1077_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1077_cast_fp16 = einsum(equation = aw_1077_equation_0, values = (var_7192_cast_fp16_18, var_7170_cast_fp16_18))[name = tensor<string, []>("aw_1077_cast_fp16")];
+            tensor<string, []> aw_1079_equation_0 = const()[name = tensor<string, []>("aw_1079_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1079_cast_fp16 = einsum(equation = aw_1079_equation_0, values = (var_7192_cast_fp16_19, var_7170_cast_fp16_19))[name = tensor<string, []>("aw_1079_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7274_cast_fp16 = softmax(axis = var_7118, x = aw_1041_cast_fp16)[name = tensor<string, []>("op_7274_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7275_cast_fp16 = softmax(axis = var_7118, x = aw_1043_cast_fp16)[name = tensor<string, []>("op_7275_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7276_cast_fp16 = softmax(axis = var_7118, x = aw_1045_cast_fp16)[name = tensor<string, []>("op_7276_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7277_cast_fp16 = softmax(axis = var_7118, x = aw_1047_cast_fp16)[name = tensor<string, []>("op_7277_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7278_cast_fp16 = softmax(axis = var_7118, x = aw_1049_cast_fp16)[name = tensor<string, []>("op_7278_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7279_cast_fp16 = softmax(axis = var_7118, x = aw_1051_cast_fp16)[name = tensor<string, []>("op_7279_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7280_cast_fp16 = softmax(axis = var_7118, x = aw_1053_cast_fp16)[name = tensor<string, []>("op_7280_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7281_cast_fp16 = softmax(axis = var_7118, x = aw_1055_cast_fp16)[name = tensor<string, []>("op_7281_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7282_cast_fp16 = softmax(axis = var_7118, x = aw_1057_cast_fp16)[name = tensor<string, []>("op_7282_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7283_cast_fp16 = softmax(axis = var_7118, x = aw_1059_cast_fp16)[name = tensor<string, []>("op_7283_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7284_cast_fp16 = softmax(axis = var_7118, x = aw_1061_cast_fp16)[name = tensor<string, []>("op_7284_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7285_cast_fp16 = softmax(axis = var_7118, x = aw_1063_cast_fp16)[name = tensor<string, []>("op_7285_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7286_cast_fp16 = softmax(axis = var_7118, x = aw_1065_cast_fp16)[name = tensor<string, []>("op_7286_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7287_cast_fp16 = softmax(axis = var_7118, x = aw_1067_cast_fp16)[name = tensor<string, []>("op_7287_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7288_cast_fp16 = softmax(axis = var_7118, x = aw_1069_cast_fp16)[name = tensor<string, []>("op_7288_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7289_cast_fp16 = softmax(axis = var_7118, x = aw_1071_cast_fp16)[name = tensor<string, []>("op_7289_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7290_cast_fp16 = softmax(axis = var_7118, x = aw_1073_cast_fp16)[name = tensor<string, []>("op_7290_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7291_cast_fp16 = softmax(axis = var_7118, x = aw_1075_cast_fp16)[name = tensor<string, []>("op_7291_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7292_cast_fp16 = softmax(axis = var_7118, x = aw_1077_cast_fp16)[name = tensor<string, []>("op_7292_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7293_cast_fp16 = softmax(axis = var_7118, x = aw_1079_cast_fp16)[name = tensor<string, []>("op_7293_cast_fp16")];
+            tensor<string, []> var_7295_equation_0 = const()[name = tensor<string, []>("op_7295_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7295_cast_fp16 = einsum(equation = var_7295_equation_0, values = (var_7213_cast_fp16_0, var_7274_cast_fp16))[name = tensor<string, []>("op_7295_cast_fp16")];
+            tensor<string, []> var_7297_equation_0 = const()[name = tensor<string, []>("op_7297_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7297_cast_fp16 = einsum(equation = var_7297_equation_0, values = (var_7213_cast_fp16_1, var_7275_cast_fp16))[name = tensor<string, []>("op_7297_cast_fp16")];
+            tensor<string, []> var_7299_equation_0 = const()[name = tensor<string, []>("op_7299_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7299_cast_fp16 = einsum(equation = var_7299_equation_0, values = (var_7213_cast_fp16_2, var_7276_cast_fp16))[name = tensor<string, []>("op_7299_cast_fp16")];
+            tensor<string, []> var_7301_equation_0 = const()[name = tensor<string, []>("op_7301_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7301_cast_fp16 = einsum(equation = var_7301_equation_0, values = (var_7213_cast_fp16_3, var_7277_cast_fp16))[name = tensor<string, []>("op_7301_cast_fp16")];
+            tensor<string, []> var_7303_equation_0 = const()[name = tensor<string, []>("op_7303_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7303_cast_fp16 = einsum(equation = var_7303_equation_0, values = (var_7213_cast_fp16_4, var_7278_cast_fp16))[name = tensor<string, []>("op_7303_cast_fp16")];
+            tensor<string, []> var_7305_equation_0 = const()[name = tensor<string, []>("op_7305_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7305_cast_fp16 = einsum(equation = var_7305_equation_0, values = (var_7213_cast_fp16_5, var_7279_cast_fp16))[name = tensor<string, []>("op_7305_cast_fp16")];
+            tensor<string, []> var_7307_equation_0 = const()[name = tensor<string, []>("op_7307_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7307_cast_fp16 = einsum(equation = var_7307_equation_0, values = (var_7213_cast_fp16_6, var_7280_cast_fp16))[name = tensor<string, []>("op_7307_cast_fp16")];
+            tensor<string, []> var_7309_equation_0 = const()[name = tensor<string, []>("op_7309_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7309_cast_fp16 = einsum(equation = var_7309_equation_0, values = (var_7213_cast_fp16_7, var_7281_cast_fp16))[name = tensor<string, []>("op_7309_cast_fp16")];
+            tensor<string, []> var_7311_equation_0 = const()[name = tensor<string, []>("op_7311_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7311_cast_fp16 = einsum(equation = var_7311_equation_0, values = (var_7213_cast_fp16_8, var_7282_cast_fp16))[name = tensor<string, []>("op_7311_cast_fp16")];
+            tensor<string, []> var_7313_equation_0 = const()[name = tensor<string, []>("op_7313_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7313_cast_fp16 = einsum(equation = var_7313_equation_0, values = (var_7213_cast_fp16_9, var_7283_cast_fp16))[name = tensor<string, []>("op_7313_cast_fp16")];
+            tensor<string, []> var_7315_equation_0 = const()[name = tensor<string, []>("op_7315_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7315_cast_fp16 = einsum(equation = var_7315_equation_0, values = (var_7213_cast_fp16_10, var_7284_cast_fp16))[name = tensor<string, []>("op_7315_cast_fp16")];
+            tensor<string, []> var_7317_equation_0 = const()[name = tensor<string, []>("op_7317_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7317_cast_fp16 = einsum(equation = var_7317_equation_0, values = (var_7213_cast_fp16_11, var_7285_cast_fp16))[name = tensor<string, []>("op_7317_cast_fp16")];
+            tensor<string, []> var_7319_equation_0 = const()[name = tensor<string, []>("op_7319_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7319_cast_fp16 = einsum(equation = var_7319_equation_0, values = (var_7213_cast_fp16_12, var_7286_cast_fp16))[name = tensor<string, []>("op_7319_cast_fp16")];
+            tensor<string, []> var_7321_equation_0 = const()[name = tensor<string, []>("op_7321_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7321_cast_fp16 = einsum(equation = var_7321_equation_0, values = (var_7213_cast_fp16_13, var_7287_cast_fp16))[name = tensor<string, []>("op_7321_cast_fp16")];
+            tensor<string, []> var_7323_equation_0 = const()[name = tensor<string, []>("op_7323_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7323_cast_fp16 = einsum(equation = var_7323_equation_0, values = (var_7213_cast_fp16_14, var_7288_cast_fp16))[name = tensor<string, []>("op_7323_cast_fp16")];
+            tensor<string, []> var_7325_equation_0 = const()[name = tensor<string, []>("op_7325_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7325_cast_fp16 = einsum(equation = var_7325_equation_0, values = (var_7213_cast_fp16_15, var_7289_cast_fp16))[name = tensor<string, []>("op_7325_cast_fp16")];
+            tensor<string, []> var_7327_equation_0 = const()[name = tensor<string, []>("op_7327_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7327_cast_fp16 = einsum(equation = var_7327_equation_0, values = (var_7213_cast_fp16_16, var_7290_cast_fp16))[name = tensor<string, []>("op_7327_cast_fp16")];
+            tensor<string, []> var_7329_equation_0 = const()[name = tensor<string, []>("op_7329_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7329_cast_fp16 = einsum(equation = var_7329_equation_0, values = (var_7213_cast_fp16_17, var_7291_cast_fp16))[name = tensor<string, []>("op_7329_cast_fp16")];
+            tensor<string, []> var_7331_equation_0 = const()[name = tensor<string, []>("op_7331_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7331_cast_fp16 = einsum(equation = var_7331_equation_0, values = (var_7213_cast_fp16_18, var_7292_cast_fp16))[name = tensor<string, []>("op_7331_cast_fp16")];
+            tensor<string, []> var_7333_equation_0 = const()[name = tensor<string, []>("op_7333_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7333_cast_fp16 = einsum(equation = var_7333_equation_0, values = (var_7213_cast_fp16_19, var_7293_cast_fp16))[name = tensor<string, []>("op_7333_cast_fp16")];
+            tensor<bool, []> input_265_interleave_0 = const()[name = tensor<string, []>("input_265_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_265_cast_fp16 = concat(axis = var_7118, interleave = input_265_interleave_0, values = (var_7295_cast_fp16, var_7297_cast_fp16, var_7299_cast_fp16, var_7301_cast_fp16, var_7303_cast_fp16, var_7305_cast_fp16, var_7307_cast_fp16, var_7309_cast_fp16, var_7311_cast_fp16, var_7313_cast_fp16, var_7315_cast_fp16, var_7317_cast_fp16, var_7319_cast_fp16, var_7321_cast_fp16, var_7323_cast_fp16, var_7325_cast_fp16, var_7327_cast_fp16, var_7329_cast_fp16, var_7331_cast_fp16, var_7333_cast_fp16))[name = tensor<string, []>("input_265_cast_fp16")];
+            tensor<string, []> var_7342_pad_type_0 = const()[name = tensor<string, []>("op_7342_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7342_strides_0 = const()[name = tensor<string, []>("op_7342_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7342_pad_0 = const()[name = tensor<string, []>("op_7342_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7342_dilations_0 = const()[name = tensor<string, []>("op_7342_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7342_groups_0 = const()[name = tensor<string, []>("op_7342_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_26_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_26_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1047316672)))];
+            tensor<fp16, [1280]> blocks_26_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_26_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050593536)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7342_cast_fp16 = conv(bias = blocks_26_attn_out_bias_to_fp16, dilations = var_7342_dilations_0, groups = var_7342_groups_0, pad = var_7342_pad_0, pad_type = var_7342_pad_type_0, strides = var_7342_strides_0, weight = blocks_26_attn_out_weight_to_fp16, x = input_265_cast_fp16)[name = tensor<string, []>("op_7342_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_107_cast_fp16 = add(x = inputs_105_cast_fp16, y = var_7342_cast_fp16)[name = tensor<string, []>("inputs_107_cast_fp16")];
+            tensor<int32, [1]> input_267_axes_0 = const()[name = tensor<string, []>("input_267_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_267_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_267_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050596160)))];
+            tensor<fp16, [1280]> input_267_beta_0_to_fp16 = const()[name = tensor<string, []>("input_267_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050598784)))];
+            tensor<fp16, []> var_7352_to_fp16 = const()[name = tensor<string, []>("op_7352_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_267_cast_fp16 = layer_norm(axes = input_267_axes_0, beta = input_267_beta_0_to_fp16, epsilon = var_7352_to_fp16, gamma = input_267_gamma_0_to_fp16, x = inputs_107_cast_fp16)[name = tensor<string, []>("input_267_cast_fp16")];
+            tensor<string, []> input_269_pad_type_0 = const()[name = tensor<string, []>("input_269_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_269_strides_0 = const()[name = tensor<string, []>("input_269_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_269_pad_0 = const()[name = tensor<string, []>("input_269_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_269_dilations_0 = const()[name = tensor<string, []>("input_269_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_269_groups_0 = const()[name = tensor<string, []>("input_269_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_26_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_26_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050601408)))];
+            tensor<fp16, [5120]> blocks_26_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_26_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1063708672)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_269_cast_fp16 = conv(bias = blocks_26_mlp_0_bias_to_fp16, dilations = input_269_dilations_0, groups = input_269_groups_0, pad = input_269_pad_0, pad_type = input_269_pad_type_0, strides = input_269_strides_0, weight = blocks_26_mlp_0_weight_to_fp16, x = input_267_cast_fp16)[name = tensor<string, []>("input_269_cast_fp16")];
+            tensor<string, []> input_271_mode_0 = const()[name = tensor<string, []>("input_271_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_271_cast_fp16 = gelu(mode = input_271_mode_0, x = input_269_cast_fp16)[name = tensor<string, []>("input_271_cast_fp16")];
+            tensor<string, []> var_7378_pad_type_0 = const()[name = tensor<string, []>("op_7378_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7378_strides_0 = const()[name = tensor<string, []>("op_7378_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7378_pad_0 = const()[name = tensor<string, []>("op_7378_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7378_dilations_0 = const()[name = tensor<string, []>("op_7378_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7378_groups_0 = const()[name = tensor<string, []>("op_7378_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_26_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_26_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1063718976)))];
+            tensor<fp16, [1280]> blocks_26_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_26_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1076826240)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7378_cast_fp16 = conv(bias = blocks_26_mlp_2_bias_to_fp16, dilations = var_7378_dilations_0, groups = var_7378_groups_0, pad = var_7378_pad_0, pad_type = var_7378_pad_type_0, strides = var_7378_strides_0, weight = blocks_26_mlp_2_weight_to_fp16, x = input_271_cast_fp16)[name = tensor<string, []>("op_7378_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_109_cast_fp16 = add(x = inputs_107_cast_fp16, y = var_7378_cast_fp16)[name = tensor<string, []>("inputs_109_cast_fp16")];
+            tensor<int32, []> var_7387 = const()[name = tensor<string, []>("op_7387"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_273_axes_0 = const()[name = tensor<string, []>("input_273_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_273_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_273_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1076828864)))];
+            tensor<fp16, [1280]> input_273_beta_0_to_fp16 = const()[name = tensor<string, []>("input_273_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1076831488)))];
+            tensor<fp16, []> var_7403_to_fp16 = const()[name = tensor<string, []>("op_7403_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_273_cast_fp16 = layer_norm(axes = input_273_axes_0, beta = input_273_beta_0_to_fp16, epsilon = var_7403_to_fp16, gamma = input_273_gamma_0_to_fp16, x = inputs_109_cast_fp16)[name = tensor<string, []>("input_273_cast_fp16")];
+            tensor<string, []> q_55_pad_type_0 = const()[name = tensor<string, []>("q_55_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_55_strides_0 = const()[name = tensor<string, []>("q_55_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_55_pad_0 = const()[name = tensor<string, []>("q_55_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_55_dilations_0 = const()[name = tensor<string, []>("q_55_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_55_groups_0 = const()[name = tensor<string, []>("q_55_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_7438_weight_0_to_fp16 = const()[name = tensor<string, []>("op_7438_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1076834112)))];
+            tensor<fp16, [1280]> var_7438_bias_0_to_fp16 = const()[name = tensor<string, []>("op_7438_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1080110976)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7438_cast_fp16 = conv(bias = var_7438_bias_0_to_fp16, dilations = q_55_dilations_0, groups = q_55_groups_0, pad = q_55_pad_0, pad_type = q_55_pad_type_0, strides = q_55_strides_0, weight = var_7438_weight_0_to_fp16, x = input_273_cast_fp16)[name = tensor<string, []>("op_7438_cast_fp16")];
+            tensor<string, []> k_55_pad_type_0 = const()[name = tensor<string, []>("k_55_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_55_strides_0 = const()[name = tensor<string, []>("k_55_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_55_pad_0 = const()[name = tensor<string, []>("k_55_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_55_dilations_0 = const()[name = tensor<string, []>("k_55_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_55_groups_0 = const()[name = tensor<string, []>("k_55_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_27_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_27_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1080113600)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_55_cast_fp16 = conv(dilations = k_55_dilations_0, groups = k_55_groups_0, pad = k_55_pad_0, pad_type = k_55_pad_type_0, strides = k_55_strides_0, weight = blocks_27_attn_key_weight_to_fp16, x = input_273_cast_fp16)[name = tensor<string, []>("k_55_cast_fp16")];
+            tensor<string, []> var_7436_pad_type_0 = const()[name = tensor<string, []>("op_7436_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7436_strides_0 = const()[name = tensor<string, []>("op_7436_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7436_pad_0 = const()[name = tensor<string, []>("op_7436_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7436_dilations_0 = const()[name = tensor<string, []>("op_7436_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7436_groups_0 = const()[name = tensor<string, []>("op_7436_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_27_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_27_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1083390464)))];
+            tensor<fp16, [1280]> blocks_27_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_27_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1086667328)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7436_cast_fp16 = conv(bias = blocks_27_attn_value_bias_to_fp16, dilations = var_7436_dilations_0, groups = var_7436_groups_0, pad = var_7436_pad_0, pad_type = var_7436_pad_type_0, strides = var_7436_strides_0, weight = blocks_27_attn_value_weight_to_fp16, x = input_273_cast_fp16)[name = tensor<string, []>("op_7436_cast_fp16")];
+            tensor<int32, [20]> tile_81 = const()[name = tensor<string, []>("tile_81"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7439_axis_0 = const()[name = tensor<string, []>("op_7439_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_19 = split(axis = var_7439_axis_0, split_sizes = tile_81, x = var_7438_cast_fp16)[name = tensor<string, []>("op_7439_cast_fp16")];
+            tensor<int32, [4]> var_7460_perm_0 = const()[name = tensor<string, []>("op_7460_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_82 = const()[name = tensor<string, []>("tile_82"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7461_axis_0 = const()[name = tensor<string, []>("op_7461_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_7460_cast_fp16 = transpose(perm = var_7460_perm_0, x = k_55_cast_fp16)[name = tensor<string, []>("transpose_5")];
+            tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_19 = split(axis = var_7461_axis_0, split_sizes = tile_82, x = var_7460_cast_fp16)[name = tensor<string, []>("op_7461_cast_fp16")];
+            tensor<int32, [20]> tile_83 = const()[name = tensor<string, []>("tile_83"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7482_axis_0 = const()[name = tensor<string, []>("op_7482_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_19 = split(axis = var_7482_axis_0, split_sizes = tile_83, x = var_7436_cast_fp16)[name = tensor<string, []>("op_7482_cast_fp16")];
+            tensor<string, []> aw_1081_equation_0 = const()[name = tensor<string, []>("aw_1081_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1081_cast_fp16 = einsum(equation = aw_1081_equation_0, values = (var_7461_cast_fp16_0, var_7439_cast_fp16_0))[name = tensor<string, []>("aw_1081_cast_fp16")];
+            tensor<string, []> aw_1083_equation_0 = const()[name = tensor<string, []>("aw_1083_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1083_cast_fp16 = einsum(equation = aw_1083_equation_0, values = (var_7461_cast_fp16_1, var_7439_cast_fp16_1))[name = tensor<string, []>("aw_1083_cast_fp16")];
+            tensor<string, []> aw_1085_equation_0 = const()[name = tensor<string, []>("aw_1085_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1085_cast_fp16 = einsum(equation = aw_1085_equation_0, values = (var_7461_cast_fp16_2, var_7439_cast_fp16_2))[name = tensor<string, []>("aw_1085_cast_fp16")];
+            tensor<string, []> aw_1087_equation_0 = const()[name = tensor<string, []>("aw_1087_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1087_cast_fp16 = einsum(equation = aw_1087_equation_0, values = (var_7461_cast_fp16_3, var_7439_cast_fp16_3))[name = tensor<string, []>("aw_1087_cast_fp16")];
+            tensor<string, []> aw_1089_equation_0 = const()[name = tensor<string, []>("aw_1089_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1089_cast_fp16 = einsum(equation = aw_1089_equation_0, values = (var_7461_cast_fp16_4, var_7439_cast_fp16_4))[name = tensor<string, []>("aw_1089_cast_fp16")];
+            tensor<string, []> aw_1091_equation_0 = const()[name = tensor<string, []>("aw_1091_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1091_cast_fp16 = einsum(equation = aw_1091_equation_0, values = (var_7461_cast_fp16_5, var_7439_cast_fp16_5))[name = tensor<string, []>("aw_1091_cast_fp16")];
+            tensor<string, []> aw_1093_equation_0 = const()[name = tensor<string, []>("aw_1093_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1093_cast_fp16 = einsum(equation = aw_1093_equation_0, values = (var_7461_cast_fp16_6, var_7439_cast_fp16_6))[name = tensor<string, []>("aw_1093_cast_fp16")];
+            tensor<string, []> aw_1095_equation_0 = const()[name = tensor<string, []>("aw_1095_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1095_cast_fp16 = einsum(equation = aw_1095_equation_0, values = (var_7461_cast_fp16_7, var_7439_cast_fp16_7))[name = tensor<string, []>("aw_1095_cast_fp16")];
+            tensor<string, []> aw_1097_equation_0 = const()[name = tensor<string, []>("aw_1097_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1097_cast_fp16 = einsum(equation = aw_1097_equation_0, values = (var_7461_cast_fp16_8, var_7439_cast_fp16_8))[name = tensor<string, []>("aw_1097_cast_fp16")];
+            tensor<string, []> aw_1099_equation_0 = const()[name = tensor<string, []>("aw_1099_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1099_cast_fp16 = einsum(equation = aw_1099_equation_0, values = (var_7461_cast_fp16_9, var_7439_cast_fp16_9))[name = tensor<string, []>("aw_1099_cast_fp16")];
+            tensor<string, []> aw_1101_equation_0 = const()[name = tensor<string, []>("aw_1101_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1101_cast_fp16 = einsum(equation = aw_1101_equation_0, values = (var_7461_cast_fp16_10, var_7439_cast_fp16_10))[name = tensor<string, []>("aw_1101_cast_fp16")];
+            tensor<string, []> aw_1103_equation_0 = const()[name = tensor<string, []>("aw_1103_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1103_cast_fp16 = einsum(equation = aw_1103_equation_0, values = (var_7461_cast_fp16_11, var_7439_cast_fp16_11))[name = tensor<string, []>("aw_1103_cast_fp16")];
+            tensor<string, []> aw_1105_equation_0 = const()[name = tensor<string, []>("aw_1105_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1105_cast_fp16 = einsum(equation = aw_1105_equation_0, values = (var_7461_cast_fp16_12, var_7439_cast_fp16_12))[name = tensor<string, []>("aw_1105_cast_fp16")];
+            tensor<string, []> aw_1107_equation_0 = const()[name = tensor<string, []>("aw_1107_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1107_cast_fp16 = einsum(equation = aw_1107_equation_0, values = (var_7461_cast_fp16_13, var_7439_cast_fp16_13))[name = tensor<string, []>("aw_1107_cast_fp16")];
+            tensor<string, []> aw_1109_equation_0 = const()[name = tensor<string, []>("aw_1109_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1109_cast_fp16 = einsum(equation = aw_1109_equation_0, values = (var_7461_cast_fp16_14, var_7439_cast_fp16_14))[name = tensor<string, []>("aw_1109_cast_fp16")];
+            tensor<string, []> aw_1111_equation_0 = const()[name = tensor<string, []>("aw_1111_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1111_cast_fp16 = einsum(equation = aw_1111_equation_0, values = (var_7461_cast_fp16_15, var_7439_cast_fp16_15))[name = tensor<string, []>("aw_1111_cast_fp16")];
+            tensor<string, []> aw_1113_equation_0 = const()[name = tensor<string, []>("aw_1113_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1113_cast_fp16 = einsum(equation = aw_1113_equation_0, values = (var_7461_cast_fp16_16, var_7439_cast_fp16_16))[name = tensor<string, []>("aw_1113_cast_fp16")];
+            tensor<string, []> aw_1115_equation_0 = const()[name = tensor<string, []>("aw_1115_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1115_cast_fp16 = einsum(equation = aw_1115_equation_0, values = (var_7461_cast_fp16_17, var_7439_cast_fp16_17))[name = tensor<string, []>("aw_1115_cast_fp16")];
+            tensor<string, []> aw_1117_equation_0 = const()[name = tensor<string, []>("aw_1117_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1117_cast_fp16 = einsum(equation = aw_1117_equation_0, values = (var_7461_cast_fp16_18, var_7439_cast_fp16_18))[name = tensor<string, []>("aw_1117_cast_fp16")];
+            tensor<string, []> aw_1119_equation_0 = const()[name = tensor<string, []>("aw_1119_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1119_cast_fp16 = einsum(equation = aw_1119_equation_0, values = (var_7461_cast_fp16_19, var_7439_cast_fp16_19))[name = tensor<string, []>("aw_1119_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7543_cast_fp16 = softmax(axis = var_7387, x = aw_1081_cast_fp16)[name = tensor<string, []>("op_7543_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7544_cast_fp16 = softmax(axis = var_7387, x = aw_1083_cast_fp16)[name = tensor<string, []>("op_7544_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7545_cast_fp16 = softmax(axis = var_7387, x = aw_1085_cast_fp16)[name = tensor<string, []>("op_7545_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7546_cast_fp16 = softmax(axis = var_7387, x = aw_1087_cast_fp16)[name = tensor<string, []>("op_7546_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7547_cast_fp16 = softmax(axis = var_7387, x = aw_1089_cast_fp16)[name = tensor<string, []>("op_7547_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7548_cast_fp16 = softmax(axis = var_7387, x = aw_1091_cast_fp16)[name = tensor<string, []>("op_7548_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7549_cast_fp16 = softmax(axis = var_7387, x = aw_1093_cast_fp16)[name = tensor<string, []>("op_7549_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7550_cast_fp16 = softmax(axis = var_7387, x = aw_1095_cast_fp16)[name = tensor<string, []>("op_7550_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7551_cast_fp16 = softmax(axis = var_7387, x = aw_1097_cast_fp16)[name = tensor<string, []>("op_7551_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7552_cast_fp16 = softmax(axis = var_7387, x = aw_1099_cast_fp16)[name = tensor<string, []>("op_7552_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7553_cast_fp16 = softmax(axis = var_7387, x = aw_1101_cast_fp16)[name = tensor<string, []>("op_7553_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7554_cast_fp16 = softmax(axis = var_7387, x = aw_1103_cast_fp16)[name = tensor<string, []>("op_7554_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7555_cast_fp16 = softmax(axis = var_7387, x = aw_1105_cast_fp16)[name = tensor<string, []>("op_7555_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7556_cast_fp16 = softmax(axis = var_7387, x = aw_1107_cast_fp16)[name = tensor<string, []>("op_7556_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7557_cast_fp16 = softmax(axis = var_7387, x = aw_1109_cast_fp16)[name = tensor<string, []>("op_7557_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7558_cast_fp16 = softmax(axis = var_7387, x = aw_1111_cast_fp16)[name = tensor<string, []>("op_7558_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7559_cast_fp16 = softmax(axis = var_7387, x = aw_1113_cast_fp16)[name = tensor<string, []>("op_7559_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7560_cast_fp16 = softmax(axis = var_7387, x = aw_1115_cast_fp16)[name = tensor<string, []>("op_7560_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7561_cast_fp16 = softmax(axis = var_7387, x = aw_1117_cast_fp16)[name = tensor<string, []>("op_7561_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7562_cast_fp16 = softmax(axis = var_7387, x = aw_1119_cast_fp16)[name = tensor<string, []>("op_7562_cast_fp16")];
+            tensor<string, []> var_7564_equation_0 = const()[name = tensor<string, []>("op_7564_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7564_cast_fp16 = einsum(equation = var_7564_equation_0, values = (var_7482_cast_fp16_0, var_7543_cast_fp16))[name = tensor<string, []>("op_7564_cast_fp16")];
+            tensor<string, []> var_7566_equation_0 = const()[name = tensor<string, []>("op_7566_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7566_cast_fp16 = einsum(equation = var_7566_equation_0, values = (var_7482_cast_fp16_1, var_7544_cast_fp16))[name = tensor<string, []>("op_7566_cast_fp16")];
+            tensor<string, []> var_7568_equation_0 = const()[name = tensor<string, []>("op_7568_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7568_cast_fp16 = einsum(equation = var_7568_equation_0, values = (var_7482_cast_fp16_2, var_7545_cast_fp16))[name = tensor<string, []>("op_7568_cast_fp16")];
+            tensor<string, []> var_7570_equation_0 = const()[name = tensor<string, []>("op_7570_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7570_cast_fp16 = einsum(equation = var_7570_equation_0, values = (var_7482_cast_fp16_3, var_7546_cast_fp16))[name = tensor<string, []>("op_7570_cast_fp16")];
+            tensor<string, []> var_7572_equation_0 = const()[name = tensor<string, []>("op_7572_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7572_cast_fp16 = einsum(equation = var_7572_equation_0, values = (var_7482_cast_fp16_4, var_7547_cast_fp16))[name = tensor<string, []>("op_7572_cast_fp16")];
+            tensor<string, []> var_7574_equation_0 = const()[name = tensor<string, []>("op_7574_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7574_cast_fp16 = einsum(equation = var_7574_equation_0, values = (var_7482_cast_fp16_5, var_7548_cast_fp16))[name = tensor<string, []>("op_7574_cast_fp16")];
+            tensor<string, []> var_7576_equation_0 = const()[name = tensor<string, []>("op_7576_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7576_cast_fp16 = einsum(equation = var_7576_equation_0, values = (var_7482_cast_fp16_6, var_7549_cast_fp16))[name = tensor<string, []>("op_7576_cast_fp16")];
+            tensor<string, []> var_7578_equation_0 = const()[name = tensor<string, []>("op_7578_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7578_cast_fp16 = einsum(equation = var_7578_equation_0, values = (var_7482_cast_fp16_7, var_7550_cast_fp16))[name = tensor<string, []>("op_7578_cast_fp16")];
+            tensor<string, []> var_7580_equation_0 = const()[name = tensor<string, []>("op_7580_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7580_cast_fp16 = einsum(equation = var_7580_equation_0, values = (var_7482_cast_fp16_8, var_7551_cast_fp16))[name = tensor<string, []>("op_7580_cast_fp16")];
+            tensor<string, []> var_7582_equation_0 = const()[name = tensor<string, []>("op_7582_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7582_cast_fp16 = einsum(equation = var_7582_equation_0, values = (var_7482_cast_fp16_9, var_7552_cast_fp16))[name = tensor<string, []>("op_7582_cast_fp16")];
+            tensor<string, []> var_7584_equation_0 = const()[name = tensor<string, []>("op_7584_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7584_cast_fp16 = einsum(equation = var_7584_equation_0, values = (var_7482_cast_fp16_10, var_7553_cast_fp16))[name = tensor<string, []>("op_7584_cast_fp16")];
+            tensor<string, []> var_7586_equation_0 = const()[name = tensor<string, []>("op_7586_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7586_cast_fp16 = einsum(equation = var_7586_equation_0, values = (var_7482_cast_fp16_11, var_7554_cast_fp16))[name = tensor<string, []>("op_7586_cast_fp16")];
+            tensor<string, []> var_7588_equation_0 = const()[name = tensor<string, []>("op_7588_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7588_cast_fp16 = einsum(equation = var_7588_equation_0, values = (var_7482_cast_fp16_12, var_7555_cast_fp16))[name = tensor<string, []>("op_7588_cast_fp16")];
+            tensor<string, []> var_7590_equation_0 = const()[name = tensor<string, []>("op_7590_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7590_cast_fp16 = einsum(equation = var_7590_equation_0, values = (var_7482_cast_fp16_13, var_7556_cast_fp16))[name = tensor<string, []>("op_7590_cast_fp16")];
+            tensor<string, []> var_7592_equation_0 = const()[name = tensor<string, []>("op_7592_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7592_cast_fp16 = einsum(equation = var_7592_equation_0, values = (var_7482_cast_fp16_14, var_7557_cast_fp16))[name = tensor<string, []>("op_7592_cast_fp16")];
+            tensor<string, []> var_7594_equation_0 = const()[name = tensor<string, []>("op_7594_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7594_cast_fp16 = einsum(equation = var_7594_equation_0, values = (var_7482_cast_fp16_15, var_7558_cast_fp16))[name = tensor<string, []>("op_7594_cast_fp16")];
+            tensor<string, []> var_7596_equation_0 = const()[name = tensor<string, []>("op_7596_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7596_cast_fp16 = einsum(equation = var_7596_equation_0, values = (var_7482_cast_fp16_16, var_7559_cast_fp16))[name = tensor<string, []>("op_7596_cast_fp16")];
+            tensor<string, []> var_7598_equation_0 = const()[name = tensor<string, []>("op_7598_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7598_cast_fp16 = einsum(equation = var_7598_equation_0, values = (var_7482_cast_fp16_17, var_7560_cast_fp16))[name = tensor<string, []>("op_7598_cast_fp16")];
+            tensor<string, []> var_7600_equation_0 = const()[name = tensor<string, []>("op_7600_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7600_cast_fp16 = einsum(equation = var_7600_equation_0, values = (var_7482_cast_fp16_18, var_7561_cast_fp16))[name = tensor<string, []>("op_7600_cast_fp16")];
+            tensor<string, []> var_7602_equation_0 = const()[name = tensor<string, []>("op_7602_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7602_cast_fp16 = einsum(equation = var_7602_equation_0, values = (var_7482_cast_fp16_19, var_7562_cast_fp16))[name = tensor<string, []>("op_7602_cast_fp16")];
+            tensor<bool, []> input_275_interleave_0 = const()[name = tensor<string, []>("input_275_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_275_cast_fp16 = concat(axis = var_7387, interleave = input_275_interleave_0, values = (var_7564_cast_fp16, var_7566_cast_fp16, var_7568_cast_fp16, var_7570_cast_fp16, var_7572_cast_fp16, var_7574_cast_fp16, var_7576_cast_fp16, var_7578_cast_fp16, var_7580_cast_fp16, var_7582_cast_fp16, var_7584_cast_fp16, var_7586_cast_fp16, var_7588_cast_fp16, var_7590_cast_fp16, var_7592_cast_fp16, var_7594_cast_fp16, var_7596_cast_fp16, var_7598_cast_fp16, var_7600_cast_fp16, var_7602_cast_fp16))[name = tensor<string, []>("input_275_cast_fp16")];
+            tensor<string, []> var_7611_pad_type_0 = const()[name = tensor<string, []>("op_7611_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7611_strides_0 = const()[name = tensor<string, []>("op_7611_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7611_pad_0 = const()[name = tensor<string, []>("op_7611_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7611_dilations_0 = const()[name = tensor<string, []>("op_7611_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7611_groups_0 = const()[name = tensor<string, []>("op_7611_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_27_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_27_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1086669952)))];
+            tensor<fp16, [1280]> blocks_27_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_27_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1089946816)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7611_cast_fp16 = conv(bias = blocks_27_attn_out_bias_to_fp16, dilations = var_7611_dilations_0, groups = var_7611_groups_0, pad = var_7611_pad_0, pad_type = var_7611_pad_type_0, strides = var_7611_strides_0, weight = blocks_27_attn_out_weight_to_fp16, x = input_275_cast_fp16)[name = tensor<string, []>("op_7611_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_111_cast_fp16 = add(x = inputs_109_cast_fp16, y = var_7611_cast_fp16)[name = tensor<string, []>("inputs_111_cast_fp16")];
+            tensor<int32, [1]> input_277_axes_0 = const()[name = tensor<string, []>("input_277_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_277_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_277_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1089949440)))];
+            tensor<fp16, [1280]> input_277_beta_0_to_fp16 = const()[name = tensor<string, []>("input_277_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1089952064)))];
+            tensor<fp16, []> var_7621_to_fp16 = const()[name = tensor<string, []>("op_7621_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_277_cast_fp16 = layer_norm(axes = input_277_axes_0, beta = input_277_beta_0_to_fp16, epsilon = var_7621_to_fp16, gamma = input_277_gamma_0_to_fp16, x = inputs_111_cast_fp16)[name = tensor<string, []>("input_277_cast_fp16")];
+            tensor<string, []> input_279_pad_type_0 = const()[name = tensor<string, []>("input_279_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_279_strides_0 = const()[name = tensor<string, []>("input_279_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_279_pad_0 = const()[name = tensor<string, []>("input_279_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_279_dilations_0 = const()[name = tensor<string, []>("input_279_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_279_groups_0 = const()[name = tensor<string, []>("input_279_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_27_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_27_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1089954688)))];
+            tensor<fp16, [5120]> blocks_27_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_27_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1103061952)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_279_cast_fp16 = conv(bias = blocks_27_mlp_0_bias_to_fp16, dilations = input_279_dilations_0, groups = input_279_groups_0, pad = input_279_pad_0, pad_type = input_279_pad_type_0, strides = input_279_strides_0, weight = blocks_27_mlp_0_weight_to_fp16, x = input_277_cast_fp16)[name = tensor<string, []>("input_279_cast_fp16")];
+            tensor<string, []> input_281_mode_0 = const()[name = tensor<string, []>("input_281_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_281_cast_fp16 = gelu(mode = input_281_mode_0, x = input_279_cast_fp16)[name = tensor<string, []>("input_281_cast_fp16")];
+            tensor<string, []> var_7647_pad_type_0 = const()[name = tensor<string, []>("op_7647_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7647_strides_0 = const()[name = tensor<string, []>("op_7647_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7647_pad_0 = const()[name = tensor<string, []>("op_7647_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7647_dilations_0 = const()[name = tensor<string, []>("op_7647_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7647_groups_0 = const()[name = tensor<string, []>("op_7647_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_27_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_27_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1103072256)))];
+            tensor<fp16, [1280]> blocks_27_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_27_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1116179520)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7647_cast_fp16 = conv(bias = blocks_27_mlp_2_bias_to_fp16, dilations = var_7647_dilations_0, groups = var_7647_groups_0, pad = var_7647_pad_0, pad_type = var_7647_pad_type_0, strides = var_7647_strides_0, weight = blocks_27_mlp_2_weight_to_fp16, x = input_281_cast_fp16)[name = tensor<string, []>("op_7647_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_113_cast_fp16 = add(x = inputs_111_cast_fp16, y = var_7647_cast_fp16)[name = tensor<string, []>("inputs_113_cast_fp16")];
+            tensor<int32, []> var_7656 = const()[name = tensor<string, []>("op_7656"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_283_axes_0 = const()[name = tensor<string, []>("input_283_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_283_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_283_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1116182144)))];
+            tensor<fp16, [1280]> input_283_beta_0_to_fp16 = const()[name = tensor<string, []>("input_283_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1116184768)))];
+            tensor<fp16, []> var_7672_to_fp16 = const()[name = tensor<string, []>("op_7672_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_283_cast_fp16 = layer_norm(axes = input_283_axes_0, beta = input_283_beta_0_to_fp16, epsilon = var_7672_to_fp16, gamma = input_283_gamma_0_to_fp16, x = inputs_113_cast_fp16)[name = tensor<string, []>("input_283_cast_fp16")];
+            tensor<string, []> q_57_pad_type_0 = const()[name = tensor<string, []>("q_57_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_57_strides_0 = const()[name = tensor<string, []>("q_57_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_57_pad_0 = const()[name = tensor<string, []>("q_57_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_57_dilations_0 = const()[name = tensor<string, []>("q_57_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_57_groups_0 = const()[name = tensor<string, []>("q_57_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_7707_weight_0_to_fp16 = const()[name = tensor<string, []>("op_7707_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1116187392)))];
+            tensor<fp16, [1280]> var_7707_bias_0_to_fp16 = const()[name = tensor<string, []>("op_7707_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1119464256)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7707_cast_fp16 = conv(bias = var_7707_bias_0_to_fp16, dilations = q_57_dilations_0, groups = q_57_groups_0, pad = q_57_pad_0, pad_type = q_57_pad_type_0, strides = q_57_strides_0, weight = var_7707_weight_0_to_fp16, x = input_283_cast_fp16)[name = tensor<string, []>("op_7707_cast_fp16")];
+            tensor<string, []> k_57_pad_type_0 = const()[name = tensor<string, []>("k_57_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_57_strides_0 = const()[name = tensor<string, []>("k_57_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_57_pad_0 = const()[name = tensor<string, []>("k_57_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_57_dilations_0 = const()[name = tensor<string, []>("k_57_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_57_groups_0 = const()[name = tensor<string, []>("k_57_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_28_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_28_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1119466880)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_57_cast_fp16 = conv(dilations = k_57_dilations_0, groups = k_57_groups_0, pad = k_57_pad_0, pad_type = k_57_pad_type_0, strides = k_57_strides_0, weight = blocks_28_attn_key_weight_to_fp16, x = input_283_cast_fp16)[name = tensor<string, []>("k_57_cast_fp16")];
+            tensor<string, []> var_7705_pad_type_0 = const()[name = tensor<string, []>("op_7705_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7705_strides_0 = const()[name = tensor<string, []>("op_7705_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7705_pad_0 = const()[name = tensor<string, []>("op_7705_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7705_dilations_0 = const()[name = tensor<string, []>("op_7705_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7705_groups_0 = const()[name = tensor<string, []>("op_7705_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_28_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_28_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1122743744)))];
+            tensor<fp16, [1280]> blocks_28_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_28_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1126020608)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7705_cast_fp16 = conv(bias = blocks_28_attn_value_bias_to_fp16, dilations = var_7705_dilations_0, groups = var_7705_groups_0, pad = var_7705_pad_0, pad_type = var_7705_pad_type_0, strides = var_7705_strides_0, weight = blocks_28_attn_value_weight_to_fp16, x = input_283_cast_fp16)[name = tensor<string, []>("op_7705_cast_fp16")];
+            tensor<int32, [20]> tile_84 = const()[name = tensor<string, []>("tile_84"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7708_axis_0 = const()[name = tensor<string, []>("op_7708_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_19 = split(axis = var_7708_axis_0, split_sizes = tile_84, x = var_7707_cast_fp16)[name = tensor<string, []>("op_7708_cast_fp16")];
+            tensor<int32, [4]> var_7729_perm_0 = const()[name = tensor<string, []>("op_7729_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_85 = const()[name = tensor<string, []>("tile_85"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7730_axis_0 = const()[name = tensor<string, []>("op_7730_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_7729_cast_fp16 = transpose(perm = var_7729_perm_0, x = k_57_cast_fp16)[name = tensor<string, []>("transpose_4")];
+            tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_19 = split(axis = var_7730_axis_0, split_sizes = tile_85, x = var_7729_cast_fp16)[name = tensor<string, []>("op_7730_cast_fp16")];
+            tensor<int32, [20]> tile_86 = const()[name = tensor<string, []>("tile_86"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7751_axis_0 = const()[name = tensor<string, []>("op_7751_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_19 = split(axis = var_7751_axis_0, split_sizes = tile_86, x = var_7705_cast_fp16)[name = tensor<string, []>("op_7751_cast_fp16")];
+            tensor<string, []> aw_1121_equation_0 = const()[name = tensor<string, []>("aw_1121_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1121_cast_fp16 = einsum(equation = aw_1121_equation_0, values = (var_7730_cast_fp16_0, var_7708_cast_fp16_0))[name = tensor<string, []>("aw_1121_cast_fp16")];
+            tensor<string, []> aw_1123_equation_0 = const()[name = tensor<string, []>("aw_1123_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1123_cast_fp16 = einsum(equation = aw_1123_equation_0, values = (var_7730_cast_fp16_1, var_7708_cast_fp16_1))[name = tensor<string, []>("aw_1123_cast_fp16")];
+            tensor<string, []> aw_1125_equation_0 = const()[name = tensor<string, []>("aw_1125_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1125_cast_fp16 = einsum(equation = aw_1125_equation_0, values = (var_7730_cast_fp16_2, var_7708_cast_fp16_2))[name = tensor<string, []>("aw_1125_cast_fp16")];
+            tensor<string, []> aw_1127_equation_0 = const()[name = tensor<string, []>("aw_1127_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1127_cast_fp16 = einsum(equation = aw_1127_equation_0, values = (var_7730_cast_fp16_3, var_7708_cast_fp16_3))[name = tensor<string, []>("aw_1127_cast_fp16")];
+            tensor<string, []> aw_1129_equation_0 = const()[name = tensor<string, []>("aw_1129_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1129_cast_fp16 = einsum(equation = aw_1129_equation_0, values = (var_7730_cast_fp16_4, var_7708_cast_fp16_4))[name = tensor<string, []>("aw_1129_cast_fp16")];
+            tensor<string, []> aw_1131_equation_0 = const()[name = tensor<string, []>("aw_1131_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1131_cast_fp16 = einsum(equation = aw_1131_equation_0, values = (var_7730_cast_fp16_5, var_7708_cast_fp16_5))[name = tensor<string, []>("aw_1131_cast_fp16")];
+            tensor<string, []> aw_1133_equation_0 = const()[name = tensor<string, []>("aw_1133_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1133_cast_fp16 = einsum(equation = aw_1133_equation_0, values = (var_7730_cast_fp16_6, var_7708_cast_fp16_6))[name = tensor<string, []>("aw_1133_cast_fp16")];
+            tensor<string, []> aw_1135_equation_0 = const()[name = tensor<string, []>("aw_1135_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1135_cast_fp16 = einsum(equation = aw_1135_equation_0, values = (var_7730_cast_fp16_7, var_7708_cast_fp16_7))[name = tensor<string, []>("aw_1135_cast_fp16")];
+            tensor<string, []> aw_1137_equation_0 = const()[name = tensor<string, []>("aw_1137_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1137_cast_fp16 = einsum(equation = aw_1137_equation_0, values = (var_7730_cast_fp16_8, var_7708_cast_fp16_8))[name = tensor<string, []>("aw_1137_cast_fp16")];
+            tensor<string, []> aw_1139_equation_0 = const()[name = tensor<string, []>("aw_1139_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1139_cast_fp16 = einsum(equation = aw_1139_equation_0, values = (var_7730_cast_fp16_9, var_7708_cast_fp16_9))[name = tensor<string, []>("aw_1139_cast_fp16")];
+            tensor<string, []> aw_1141_equation_0 = const()[name = tensor<string, []>("aw_1141_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1141_cast_fp16 = einsum(equation = aw_1141_equation_0, values = (var_7730_cast_fp16_10, var_7708_cast_fp16_10))[name = tensor<string, []>("aw_1141_cast_fp16")];
+            tensor<string, []> aw_1143_equation_0 = const()[name = tensor<string, []>("aw_1143_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1143_cast_fp16 = einsum(equation = aw_1143_equation_0, values = (var_7730_cast_fp16_11, var_7708_cast_fp16_11))[name = tensor<string, []>("aw_1143_cast_fp16")];
+            tensor<string, []> aw_1145_equation_0 = const()[name = tensor<string, []>("aw_1145_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1145_cast_fp16 = einsum(equation = aw_1145_equation_0, values = (var_7730_cast_fp16_12, var_7708_cast_fp16_12))[name = tensor<string, []>("aw_1145_cast_fp16")];
+            tensor<string, []> aw_1147_equation_0 = const()[name = tensor<string, []>("aw_1147_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1147_cast_fp16 = einsum(equation = aw_1147_equation_0, values = (var_7730_cast_fp16_13, var_7708_cast_fp16_13))[name = tensor<string, []>("aw_1147_cast_fp16")];
+            tensor<string, []> aw_1149_equation_0 = const()[name = tensor<string, []>("aw_1149_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1149_cast_fp16 = einsum(equation = aw_1149_equation_0, values = (var_7730_cast_fp16_14, var_7708_cast_fp16_14))[name = tensor<string, []>("aw_1149_cast_fp16")];
+            tensor<string, []> aw_1151_equation_0 = const()[name = tensor<string, []>("aw_1151_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1151_cast_fp16 = einsum(equation = aw_1151_equation_0, values = (var_7730_cast_fp16_15, var_7708_cast_fp16_15))[name = tensor<string, []>("aw_1151_cast_fp16")];
+            tensor<string, []> aw_1153_equation_0 = const()[name = tensor<string, []>("aw_1153_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1153_cast_fp16 = einsum(equation = aw_1153_equation_0, values = (var_7730_cast_fp16_16, var_7708_cast_fp16_16))[name = tensor<string, []>("aw_1153_cast_fp16")];
+            tensor<string, []> aw_1155_equation_0 = const()[name = tensor<string, []>("aw_1155_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1155_cast_fp16 = einsum(equation = aw_1155_equation_0, values = (var_7730_cast_fp16_17, var_7708_cast_fp16_17))[name = tensor<string, []>("aw_1155_cast_fp16")];
+            tensor<string, []> aw_1157_equation_0 = const()[name = tensor<string, []>("aw_1157_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1157_cast_fp16 = einsum(equation = aw_1157_equation_0, values = (var_7730_cast_fp16_18, var_7708_cast_fp16_18))[name = tensor<string, []>("aw_1157_cast_fp16")];
+            tensor<string, []> aw_1159_equation_0 = const()[name = tensor<string, []>("aw_1159_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1159_cast_fp16 = einsum(equation = aw_1159_equation_0, values = (var_7730_cast_fp16_19, var_7708_cast_fp16_19))[name = tensor<string, []>("aw_1159_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7812_cast_fp16 = softmax(axis = var_7656, x = aw_1121_cast_fp16)[name = tensor<string, []>("op_7812_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7813_cast_fp16 = softmax(axis = var_7656, x = aw_1123_cast_fp16)[name = tensor<string, []>("op_7813_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7814_cast_fp16 = softmax(axis = var_7656, x = aw_1125_cast_fp16)[name = tensor<string, []>("op_7814_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7815_cast_fp16 = softmax(axis = var_7656, x = aw_1127_cast_fp16)[name = tensor<string, []>("op_7815_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7816_cast_fp16 = softmax(axis = var_7656, x = aw_1129_cast_fp16)[name = tensor<string, []>("op_7816_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7817_cast_fp16 = softmax(axis = var_7656, x = aw_1131_cast_fp16)[name = tensor<string, []>("op_7817_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7818_cast_fp16 = softmax(axis = var_7656, x = aw_1133_cast_fp16)[name = tensor<string, []>("op_7818_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7819_cast_fp16 = softmax(axis = var_7656, x = aw_1135_cast_fp16)[name = tensor<string, []>("op_7819_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7820_cast_fp16 = softmax(axis = var_7656, x = aw_1137_cast_fp16)[name = tensor<string, []>("op_7820_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7821_cast_fp16 = softmax(axis = var_7656, x = aw_1139_cast_fp16)[name = tensor<string, []>("op_7821_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7822_cast_fp16 = softmax(axis = var_7656, x = aw_1141_cast_fp16)[name = tensor<string, []>("op_7822_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7823_cast_fp16 = softmax(axis = var_7656, x = aw_1143_cast_fp16)[name = tensor<string, []>("op_7823_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7824_cast_fp16 = softmax(axis = var_7656, x = aw_1145_cast_fp16)[name = tensor<string, []>("op_7824_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7825_cast_fp16 = softmax(axis = var_7656, x = aw_1147_cast_fp16)[name = tensor<string, []>("op_7825_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7826_cast_fp16 = softmax(axis = var_7656, x = aw_1149_cast_fp16)[name = tensor<string, []>("op_7826_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7827_cast_fp16 = softmax(axis = var_7656, x = aw_1151_cast_fp16)[name = tensor<string, []>("op_7827_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7828_cast_fp16 = softmax(axis = var_7656, x = aw_1153_cast_fp16)[name = tensor<string, []>("op_7828_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7829_cast_fp16 = softmax(axis = var_7656, x = aw_1155_cast_fp16)[name = tensor<string, []>("op_7829_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7830_cast_fp16 = softmax(axis = var_7656, x = aw_1157_cast_fp16)[name = tensor<string, []>("op_7830_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7831_cast_fp16 = softmax(axis = var_7656, x = aw_1159_cast_fp16)[name = tensor<string, []>("op_7831_cast_fp16")];
+            tensor<string, []> var_7833_equation_0 = const()[name = tensor<string, []>("op_7833_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7833_cast_fp16 = einsum(equation = var_7833_equation_0, values = (var_7751_cast_fp16_0, var_7812_cast_fp16))[name = tensor<string, []>("op_7833_cast_fp16")];
+            tensor<string, []> var_7835_equation_0 = const()[name = tensor<string, []>("op_7835_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7835_cast_fp16 = einsum(equation = var_7835_equation_0, values = (var_7751_cast_fp16_1, var_7813_cast_fp16))[name = tensor<string, []>("op_7835_cast_fp16")];
+            tensor<string, []> var_7837_equation_0 = const()[name = tensor<string, []>("op_7837_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7837_cast_fp16 = einsum(equation = var_7837_equation_0, values = (var_7751_cast_fp16_2, var_7814_cast_fp16))[name = tensor<string, []>("op_7837_cast_fp16")];
+            tensor<string, []> var_7839_equation_0 = const()[name = tensor<string, []>("op_7839_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7839_cast_fp16 = einsum(equation = var_7839_equation_0, values = (var_7751_cast_fp16_3, var_7815_cast_fp16))[name = tensor<string, []>("op_7839_cast_fp16")];
+            tensor<string, []> var_7841_equation_0 = const()[name = tensor<string, []>("op_7841_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7841_cast_fp16 = einsum(equation = var_7841_equation_0, values = (var_7751_cast_fp16_4, var_7816_cast_fp16))[name = tensor<string, []>("op_7841_cast_fp16")];
+            tensor<string, []> var_7843_equation_0 = const()[name = tensor<string, []>("op_7843_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7843_cast_fp16 = einsum(equation = var_7843_equation_0, values = (var_7751_cast_fp16_5, var_7817_cast_fp16))[name = tensor<string, []>("op_7843_cast_fp16")];
+            tensor<string, []> var_7845_equation_0 = const()[name = tensor<string, []>("op_7845_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7845_cast_fp16 = einsum(equation = var_7845_equation_0, values = (var_7751_cast_fp16_6, var_7818_cast_fp16))[name = tensor<string, []>("op_7845_cast_fp16")];
+            tensor<string, []> var_7847_equation_0 = const()[name = tensor<string, []>("op_7847_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7847_cast_fp16 = einsum(equation = var_7847_equation_0, values = (var_7751_cast_fp16_7, var_7819_cast_fp16))[name = tensor<string, []>("op_7847_cast_fp16")];
+            tensor<string, []> var_7849_equation_0 = const()[name = tensor<string, []>("op_7849_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7849_cast_fp16 = einsum(equation = var_7849_equation_0, values = (var_7751_cast_fp16_8, var_7820_cast_fp16))[name = tensor<string, []>("op_7849_cast_fp16")];
+            tensor<string, []> var_7851_equation_0 = const()[name = tensor<string, []>("op_7851_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7851_cast_fp16 = einsum(equation = var_7851_equation_0, values = (var_7751_cast_fp16_9, var_7821_cast_fp16))[name = tensor<string, []>("op_7851_cast_fp16")];
+            tensor<string, []> var_7853_equation_0 = const()[name = tensor<string, []>("op_7853_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7853_cast_fp16 = einsum(equation = var_7853_equation_0, values = (var_7751_cast_fp16_10, var_7822_cast_fp16))[name = tensor<string, []>("op_7853_cast_fp16")];
+            tensor<string, []> var_7855_equation_0 = const()[name = tensor<string, []>("op_7855_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7855_cast_fp16 = einsum(equation = var_7855_equation_0, values = (var_7751_cast_fp16_11, var_7823_cast_fp16))[name = tensor<string, []>("op_7855_cast_fp16")];
+            tensor<string, []> var_7857_equation_0 = const()[name = tensor<string, []>("op_7857_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7857_cast_fp16 = einsum(equation = var_7857_equation_0, values = (var_7751_cast_fp16_12, var_7824_cast_fp16))[name = tensor<string, []>("op_7857_cast_fp16")];
+            tensor<string, []> var_7859_equation_0 = const()[name = tensor<string, []>("op_7859_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7859_cast_fp16 = einsum(equation = var_7859_equation_0, values = (var_7751_cast_fp16_13, var_7825_cast_fp16))[name = tensor<string, []>("op_7859_cast_fp16")];
+            tensor<string, []> var_7861_equation_0 = const()[name = tensor<string, []>("op_7861_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7861_cast_fp16 = einsum(equation = var_7861_equation_0, values = (var_7751_cast_fp16_14, var_7826_cast_fp16))[name = tensor<string, []>("op_7861_cast_fp16")];
+            tensor<string, []> var_7863_equation_0 = const()[name = tensor<string, []>("op_7863_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7863_cast_fp16 = einsum(equation = var_7863_equation_0, values = (var_7751_cast_fp16_15, var_7827_cast_fp16))[name = tensor<string, []>("op_7863_cast_fp16")];
+            tensor<string, []> var_7865_equation_0 = const()[name = tensor<string, []>("op_7865_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7865_cast_fp16 = einsum(equation = var_7865_equation_0, values = (var_7751_cast_fp16_16, var_7828_cast_fp16))[name = tensor<string, []>("op_7865_cast_fp16")];
+            tensor<string, []> var_7867_equation_0 = const()[name = tensor<string, []>("op_7867_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7867_cast_fp16 = einsum(equation = var_7867_equation_0, values = (var_7751_cast_fp16_17, var_7829_cast_fp16))[name = tensor<string, []>("op_7867_cast_fp16")];
+            tensor<string, []> var_7869_equation_0 = const()[name = tensor<string, []>("op_7869_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7869_cast_fp16 = einsum(equation = var_7869_equation_0, values = (var_7751_cast_fp16_18, var_7830_cast_fp16))[name = tensor<string, []>("op_7869_cast_fp16")];
+            tensor<string, []> var_7871_equation_0 = const()[name = tensor<string, []>("op_7871_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7871_cast_fp16 = einsum(equation = var_7871_equation_0, values = (var_7751_cast_fp16_19, var_7831_cast_fp16))[name = tensor<string, []>("op_7871_cast_fp16")];
+            tensor<bool, []> input_285_interleave_0 = const()[name = tensor<string, []>("input_285_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_285_cast_fp16 = concat(axis = var_7656, interleave = input_285_interleave_0, values = (var_7833_cast_fp16, var_7835_cast_fp16, var_7837_cast_fp16, var_7839_cast_fp16, var_7841_cast_fp16, var_7843_cast_fp16, var_7845_cast_fp16, var_7847_cast_fp16, var_7849_cast_fp16, var_7851_cast_fp16, var_7853_cast_fp16, var_7855_cast_fp16, var_7857_cast_fp16, var_7859_cast_fp16, var_7861_cast_fp16, var_7863_cast_fp16, var_7865_cast_fp16, var_7867_cast_fp16, var_7869_cast_fp16, var_7871_cast_fp16))[name = tensor<string, []>("input_285_cast_fp16")];
+            tensor<string, []> var_7880_pad_type_0 = const()[name = tensor<string, []>("op_7880_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7880_strides_0 = const()[name = tensor<string, []>("op_7880_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7880_pad_0 = const()[name = tensor<string, []>("op_7880_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7880_dilations_0 = const()[name = tensor<string, []>("op_7880_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7880_groups_0 = const()[name = tensor<string, []>("op_7880_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_28_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_28_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1126023232)))];
+            tensor<fp16, [1280]> blocks_28_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_28_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1129300096)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7880_cast_fp16 = conv(bias = blocks_28_attn_out_bias_to_fp16, dilations = var_7880_dilations_0, groups = var_7880_groups_0, pad = var_7880_pad_0, pad_type = var_7880_pad_type_0, strides = var_7880_strides_0, weight = blocks_28_attn_out_weight_to_fp16, x = input_285_cast_fp16)[name = tensor<string, []>("op_7880_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_115_cast_fp16 = add(x = inputs_113_cast_fp16, y = var_7880_cast_fp16)[name = tensor<string, []>("inputs_115_cast_fp16")];
+            tensor<int32, [1]> input_287_axes_0 = const()[name = tensor<string, []>("input_287_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_287_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_287_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1129302720)))];
+            tensor<fp16, [1280]> input_287_beta_0_to_fp16 = const()[name = tensor<string, []>("input_287_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1129305344)))];
+            tensor<fp16, []> var_7890_to_fp16 = const()[name = tensor<string, []>("op_7890_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_287_cast_fp16 = layer_norm(axes = input_287_axes_0, beta = input_287_beta_0_to_fp16, epsilon = var_7890_to_fp16, gamma = input_287_gamma_0_to_fp16, x = inputs_115_cast_fp16)[name = tensor<string, []>("input_287_cast_fp16")];
+            tensor<string, []> input_289_pad_type_0 = const()[name = tensor<string, []>("input_289_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_289_strides_0 = const()[name = tensor<string, []>("input_289_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_289_pad_0 = const()[name = tensor<string, []>("input_289_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_289_dilations_0 = const()[name = tensor<string, []>("input_289_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_289_groups_0 = const()[name = tensor<string, []>("input_289_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_28_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_28_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1129307968)))];
+            tensor<fp16, [5120]> blocks_28_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_28_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1142415232)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_289_cast_fp16 = conv(bias = blocks_28_mlp_0_bias_to_fp16, dilations = input_289_dilations_0, groups = input_289_groups_0, pad = input_289_pad_0, pad_type = input_289_pad_type_0, strides = input_289_strides_0, weight = blocks_28_mlp_0_weight_to_fp16, x = input_287_cast_fp16)[name = tensor<string, []>("input_289_cast_fp16")];
+            tensor<string, []> input_291_mode_0 = const()[name = tensor<string, []>("input_291_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_291_cast_fp16 = gelu(mode = input_291_mode_0, x = input_289_cast_fp16)[name = tensor<string, []>("input_291_cast_fp16")];
+            tensor<string, []> var_7916_pad_type_0 = const()[name = tensor<string, []>("op_7916_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7916_strides_0 = const()[name = tensor<string, []>("op_7916_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7916_pad_0 = const()[name = tensor<string, []>("op_7916_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7916_dilations_0 = const()[name = tensor<string, []>("op_7916_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7916_groups_0 = const()[name = tensor<string, []>("op_7916_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_28_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_28_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1142425536)))];
+            tensor<fp16, [1280]> blocks_28_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_28_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1155532800)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7916_cast_fp16 = conv(bias = blocks_28_mlp_2_bias_to_fp16, dilations = var_7916_dilations_0, groups = var_7916_groups_0, pad = var_7916_pad_0, pad_type = var_7916_pad_type_0, strides = var_7916_strides_0, weight = blocks_28_mlp_2_weight_to_fp16, x = input_291_cast_fp16)[name = tensor<string, []>("op_7916_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_117_cast_fp16 = add(x = inputs_115_cast_fp16, y = var_7916_cast_fp16)[name = tensor<string, []>("inputs_117_cast_fp16")];
+            tensor<int32, []> var_7925 = const()[name = tensor<string, []>("op_7925"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_293_axes_0 = const()[name = tensor<string, []>("input_293_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_293_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_293_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1155535424)))];
+            tensor<fp16, [1280]> input_293_beta_0_to_fp16 = const()[name = tensor<string, []>("input_293_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1155538048)))];
+            tensor<fp16, []> var_7941_to_fp16 = const()[name = tensor<string, []>("op_7941_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_293_cast_fp16 = layer_norm(axes = input_293_axes_0, beta = input_293_beta_0_to_fp16, epsilon = var_7941_to_fp16, gamma = input_293_gamma_0_to_fp16, x = inputs_117_cast_fp16)[name = tensor<string, []>("input_293_cast_fp16")];
+            tensor<string, []> q_59_pad_type_0 = const()[name = tensor<string, []>("q_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_59_strides_0 = const()[name = tensor<string, []>("q_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_59_pad_0 = const()[name = tensor<string, []>("q_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_59_dilations_0 = const()[name = tensor<string, []>("q_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_59_groups_0 = const()[name = tensor<string, []>("q_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_7976_weight_0_to_fp16 = const()[name = tensor<string, []>("op_7976_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1155540672)))];
+            tensor<fp16, [1280]> var_7976_bias_0_to_fp16 = const()[name = tensor<string, []>("op_7976_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1158817536)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7976_cast_fp16 = conv(bias = var_7976_bias_0_to_fp16, dilations = q_59_dilations_0, groups = q_59_groups_0, pad = q_59_pad_0, pad_type = q_59_pad_type_0, strides = q_59_strides_0, weight = var_7976_weight_0_to_fp16, x = input_293_cast_fp16)[name = tensor<string, []>("op_7976_cast_fp16")];
+            tensor<string, []> k_59_pad_type_0 = const()[name = tensor<string, []>("k_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_59_strides_0 = const()[name = tensor<string, []>("k_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_59_pad_0 = const()[name = tensor<string, []>("k_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_59_dilations_0 = const()[name = tensor<string, []>("k_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_59_groups_0 = const()[name = tensor<string, []>("k_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_29_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_29_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1158820160)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_59_cast_fp16 = conv(dilations = k_59_dilations_0, groups = k_59_groups_0, pad = k_59_pad_0, pad_type = k_59_pad_type_0, strides = k_59_strides_0, weight = blocks_29_attn_key_weight_to_fp16, x = input_293_cast_fp16)[name = tensor<string, []>("k_59_cast_fp16")];
+            tensor<string, []> var_7974_pad_type_0 = const()[name = tensor<string, []>("op_7974_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7974_strides_0 = const()[name = tensor<string, []>("op_7974_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7974_pad_0 = const()[name = tensor<string, []>("op_7974_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7974_dilations_0 = const()[name = tensor<string, []>("op_7974_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7974_groups_0 = const()[name = tensor<string, []>("op_7974_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_29_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_29_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1162097024)))];
+            tensor<fp16, [1280]> blocks_29_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_29_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1165373888)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7974_cast_fp16 = conv(bias = blocks_29_attn_value_bias_to_fp16, dilations = var_7974_dilations_0, groups = var_7974_groups_0, pad = var_7974_pad_0, pad_type = var_7974_pad_type_0, strides = var_7974_strides_0, weight = blocks_29_attn_value_weight_to_fp16, x = input_293_cast_fp16)[name = tensor<string, []>("op_7974_cast_fp16")];
+            tensor<int32, [20]> tile_87 = const()[name = tensor<string, []>("tile_87"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7977_axis_0 = const()[name = tensor<string, []>("op_7977_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_19 = split(axis = var_7977_axis_0, split_sizes = tile_87, x = var_7976_cast_fp16)[name = tensor<string, []>("op_7977_cast_fp16")];
+            tensor<int32, [4]> var_7998_perm_0 = const()[name = tensor<string, []>("op_7998_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_88 = const()[name = tensor<string, []>("tile_88"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7999_axis_0 = const()[name = tensor<string, []>("op_7999_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_7998_cast_fp16 = transpose(perm = var_7998_perm_0, x = k_59_cast_fp16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_19 = split(axis = var_7999_axis_0, split_sizes = tile_88, x = var_7998_cast_fp16)[name = tensor<string, []>("op_7999_cast_fp16")];
+            tensor<int32, [20]> tile_89 = const()[name = tensor<string, []>("tile_89"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8020_axis_0 = const()[name = tensor<string, []>("op_8020_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_19 = split(axis = var_8020_axis_0, split_sizes = tile_89, x = var_7974_cast_fp16)[name = tensor<string, []>("op_8020_cast_fp16")];
+            tensor<string, []> aw_1161_equation_0 = const()[name = tensor<string, []>("aw_1161_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1161_cast_fp16 = einsum(equation = aw_1161_equation_0, values = (var_7999_cast_fp16_0, var_7977_cast_fp16_0))[name = tensor<string, []>("aw_1161_cast_fp16")];
+            tensor<string, []> aw_1163_equation_0 = const()[name = tensor<string, []>("aw_1163_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1163_cast_fp16 = einsum(equation = aw_1163_equation_0, values = (var_7999_cast_fp16_1, var_7977_cast_fp16_1))[name = tensor<string, []>("aw_1163_cast_fp16")];
+            tensor<string, []> aw_1165_equation_0 = const()[name = tensor<string, []>("aw_1165_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1165_cast_fp16 = einsum(equation = aw_1165_equation_0, values = (var_7999_cast_fp16_2, var_7977_cast_fp16_2))[name = tensor<string, []>("aw_1165_cast_fp16")];
+            tensor<string, []> aw_1167_equation_0 = const()[name = tensor<string, []>("aw_1167_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1167_cast_fp16 = einsum(equation = aw_1167_equation_0, values = (var_7999_cast_fp16_3, var_7977_cast_fp16_3))[name = tensor<string, []>("aw_1167_cast_fp16")];
+            tensor<string, []> aw_1169_equation_0 = const()[name = tensor<string, []>("aw_1169_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1169_cast_fp16 = einsum(equation = aw_1169_equation_0, values = (var_7999_cast_fp16_4, var_7977_cast_fp16_4))[name = tensor<string, []>("aw_1169_cast_fp16")];
+            tensor<string, []> aw_1171_equation_0 = const()[name = tensor<string, []>("aw_1171_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1171_cast_fp16 = einsum(equation = aw_1171_equation_0, values = (var_7999_cast_fp16_5, var_7977_cast_fp16_5))[name = tensor<string, []>("aw_1171_cast_fp16")];
+            tensor<string, []> aw_1173_equation_0 = const()[name = tensor<string, []>("aw_1173_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1173_cast_fp16 = einsum(equation = aw_1173_equation_0, values = (var_7999_cast_fp16_6, var_7977_cast_fp16_6))[name = tensor<string, []>("aw_1173_cast_fp16")];
+            tensor<string, []> aw_1175_equation_0 = const()[name = tensor<string, []>("aw_1175_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1175_cast_fp16 = einsum(equation = aw_1175_equation_0, values = (var_7999_cast_fp16_7, var_7977_cast_fp16_7))[name = tensor<string, []>("aw_1175_cast_fp16")];
+            tensor<string, []> aw_1177_equation_0 = const()[name = tensor<string, []>("aw_1177_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1177_cast_fp16 = einsum(equation = aw_1177_equation_0, values = (var_7999_cast_fp16_8, var_7977_cast_fp16_8))[name = tensor<string, []>("aw_1177_cast_fp16")];
+            tensor<string, []> aw_1179_equation_0 = const()[name = tensor<string, []>("aw_1179_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1179_cast_fp16 = einsum(equation = aw_1179_equation_0, values = (var_7999_cast_fp16_9, var_7977_cast_fp16_9))[name = tensor<string, []>("aw_1179_cast_fp16")];
+            tensor<string, []> aw_1181_equation_0 = const()[name = tensor<string, []>("aw_1181_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1181_cast_fp16 = einsum(equation = aw_1181_equation_0, values = (var_7999_cast_fp16_10, var_7977_cast_fp16_10))[name = tensor<string, []>("aw_1181_cast_fp16")];
+            tensor<string, []> aw_1183_equation_0 = const()[name = tensor<string, []>("aw_1183_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1183_cast_fp16 = einsum(equation = aw_1183_equation_0, values = (var_7999_cast_fp16_11, var_7977_cast_fp16_11))[name = tensor<string, []>("aw_1183_cast_fp16")];
+            tensor<string, []> aw_1185_equation_0 = const()[name = tensor<string, []>("aw_1185_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1185_cast_fp16 = einsum(equation = aw_1185_equation_0, values = (var_7999_cast_fp16_12, var_7977_cast_fp16_12))[name = tensor<string, []>("aw_1185_cast_fp16")];
+            tensor<string, []> aw_1187_equation_0 = const()[name = tensor<string, []>("aw_1187_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1187_cast_fp16 = einsum(equation = aw_1187_equation_0, values = (var_7999_cast_fp16_13, var_7977_cast_fp16_13))[name = tensor<string, []>("aw_1187_cast_fp16")];
+            tensor<string, []> aw_1189_equation_0 = const()[name = tensor<string, []>("aw_1189_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1189_cast_fp16 = einsum(equation = aw_1189_equation_0, values = (var_7999_cast_fp16_14, var_7977_cast_fp16_14))[name = tensor<string, []>("aw_1189_cast_fp16")];
+            tensor<string, []> aw_1191_equation_0 = const()[name = tensor<string, []>("aw_1191_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1191_cast_fp16 = einsum(equation = aw_1191_equation_0, values = (var_7999_cast_fp16_15, var_7977_cast_fp16_15))[name = tensor<string, []>("aw_1191_cast_fp16")];
+            tensor<string, []> aw_1193_equation_0 = const()[name = tensor<string, []>("aw_1193_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1193_cast_fp16 = einsum(equation = aw_1193_equation_0, values = (var_7999_cast_fp16_16, var_7977_cast_fp16_16))[name = tensor<string, []>("aw_1193_cast_fp16")];
+            tensor<string, []> aw_1195_equation_0 = const()[name = tensor<string, []>("aw_1195_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1195_cast_fp16 = einsum(equation = aw_1195_equation_0, values = (var_7999_cast_fp16_17, var_7977_cast_fp16_17))[name = tensor<string, []>("aw_1195_cast_fp16")];
+            tensor<string, []> aw_1197_equation_0 = const()[name = tensor<string, []>("aw_1197_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1197_cast_fp16 = einsum(equation = aw_1197_equation_0, values = (var_7999_cast_fp16_18, var_7977_cast_fp16_18))[name = tensor<string, []>("aw_1197_cast_fp16")];
+            tensor<string, []> aw_1199_equation_0 = const()[name = tensor<string, []>("aw_1199_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1199_cast_fp16 = einsum(equation = aw_1199_equation_0, values = (var_7999_cast_fp16_19, var_7977_cast_fp16_19))[name = tensor<string, []>("aw_1199_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8081_cast_fp16 = softmax(axis = var_7925, x = aw_1161_cast_fp16)[name = tensor<string, []>("op_8081_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8082_cast_fp16 = softmax(axis = var_7925, x = aw_1163_cast_fp16)[name = tensor<string, []>("op_8082_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8083_cast_fp16 = softmax(axis = var_7925, x = aw_1165_cast_fp16)[name = tensor<string, []>("op_8083_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8084_cast_fp16 = softmax(axis = var_7925, x = aw_1167_cast_fp16)[name = tensor<string, []>("op_8084_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8085_cast_fp16 = softmax(axis = var_7925, x = aw_1169_cast_fp16)[name = tensor<string, []>("op_8085_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8086_cast_fp16 = softmax(axis = var_7925, x = aw_1171_cast_fp16)[name = tensor<string, []>("op_8086_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8087_cast_fp16 = softmax(axis = var_7925, x = aw_1173_cast_fp16)[name = tensor<string, []>("op_8087_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8088_cast_fp16 = softmax(axis = var_7925, x = aw_1175_cast_fp16)[name = tensor<string, []>("op_8088_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8089_cast_fp16 = softmax(axis = var_7925, x = aw_1177_cast_fp16)[name = tensor<string, []>("op_8089_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8090_cast_fp16 = softmax(axis = var_7925, x = aw_1179_cast_fp16)[name = tensor<string, []>("op_8090_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8091_cast_fp16 = softmax(axis = var_7925, x = aw_1181_cast_fp16)[name = tensor<string, []>("op_8091_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8092_cast_fp16 = softmax(axis = var_7925, x = aw_1183_cast_fp16)[name = tensor<string, []>("op_8092_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8093_cast_fp16 = softmax(axis = var_7925, x = aw_1185_cast_fp16)[name = tensor<string, []>("op_8093_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8094_cast_fp16 = softmax(axis = var_7925, x = aw_1187_cast_fp16)[name = tensor<string, []>("op_8094_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8095_cast_fp16 = softmax(axis = var_7925, x = aw_1189_cast_fp16)[name = tensor<string, []>("op_8095_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8096_cast_fp16 = softmax(axis = var_7925, x = aw_1191_cast_fp16)[name = tensor<string, []>("op_8096_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8097_cast_fp16 = softmax(axis = var_7925, x = aw_1193_cast_fp16)[name = tensor<string, []>("op_8097_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8098_cast_fp16 = softmax(axis = var_7925, x = aw_1195_cast_fp16)[name = tensor<string, []>("op_8098_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8099_cast_fp16 = softmax(axis = var_7925, x = aw_1197_cast_fp16)[name = tensor<string, []>("op_8099_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8100_cast_fp16 = softmax(axis = var_7925, x = aw_1199_cast_fp16)[name = tensor<string, []>("op_8100_cast_fp16")];
+            tensor<string, []> var_8102_equation_0 = const()[name = tensor<string, []>("op_8102_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8102_cast_fp16 = einsum(equation = var_8102_equation_0, values = (var_8020_cast_fp16_0, var_8081_cast_fp16))[name = tensor<string, []>("op_8102_cast_fp16")];
+            tensor<string, []> var_8104_equation_0 = const()[name = tensor<string, []>("op_8104_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8104_cast_fp16 = einsum(equation = var_8104_equation_0, values = (var_8020_cast_fp16_1, var_8082_cast_fp16))[name = tensor<string, []>("op_8104_cast_fp16")];
+            tensor<string, []> var_8106_equation_0 = const()[name = tensor<string, []>("op_8106_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8106_cast_fp16 = einsum(equation = var_8106_equation_0, values = (var_8020_cast_fp16_2, var_8083_cast_fp16))[name = tensor<string, []>("op_8106_cast_fp16")];
+            tensor<string, []> var_8108_equation_0 = const()[name = tensor<string, []>("op_8108_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8108_cast_fp16 = einsum(equation = var_8108_equation_0, values = (var_8020_cast_fp16_3, var_8084_cast_fp16))[name = tensor<string, []>("op_8108_cast_fp16")];
+            tensor<string, []> var_8110_equation_0 = const()[name = tensor<string, []>("op_8110_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8110_cast_fp16 = einsum(equation = var_8110_equation_0, values = (var_8020_cast_fp16_4, var_8085_cast_fp16))[name = tensor<string, []>("op_8110_cast_fp16")];
+            tensor<string, []> var_8112_equation_0 = const()[name = tensor<string, []>("op_8112_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8112_cast_fp16 = einsum(equation = var_8112_equation_0, values = (var_8020_cast_fp16_5, var_8086_cast_fp16))[name = tensor<string, []>("op_8112_cast_fp16")];
+            tensor<string, []> var_8114_equation_0 = const()[name = tensor<string, []>("op_8114_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8114_cast_fp16 = einsum(equation = var_8114_equation_0, values = (var_8020_cast_fp16_6, var_8087_cast_fp16))[name = tensor<string, []>("op_8114_cast_fp16")];
+            tensor<string, []> var_8116_equation_0 = const()[name = tensor<string, []>("op_8116_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8116_cast_fp16 = einsum(equation = var_8116_equation_0, values = (var_8020_cast_fp16_7, var_8088_cast_fp16))[name = tensor<string, []>("op_8116_cast_fp16")];
+            tensor<string, []> var_8118_equation_0 = const()[name = tensor<string, []>("op_8118_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8118_cast_fp16 = einsum(equation = var_8118_equation_0, values = (var_8020_cast_fp16_8, var_8089_cast_fp16))[name = tensor<string, []>("op_8118_cast_fp16")];
+            tensor<string, []> var_8120_equation_0 = const()[name = tensor<string, []>("op_8120_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8120_cast_fp16 = einsum(equation = var_8120_equation_0, values = (var_8020_cast_fp16_9, var_8090_cast_fp16))[name = tensor<string, []>("op_8120_cast_fp16")];
+            tensor<string, []> var_8122_equation_0 = const()[name = tensor<string, []>("op_8122_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8122_cast_fp16 = einsum(equation = var_8122_equation_0, values = (var_8020_cast_fp16_10, var_8091_cast_fp16))[name = tensor<string, []>("op_8122_cast_fp16")];
+            tensor<string, []> var_8124_equation_0 = const()[name = tensor<string, []>("op_8124_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8124_cast_fp16 = einsum(equation = var_8124_equation_0, values = (var_8020_cast_fp16_11, var_8092_cast_fp16))[name = tensor<string, []>("op_8124_cast_fp16")];
+            tensor<string, []> var_8126_equation_0 = const()[name = tensor<string, []>("op_8126_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8126_cast_fp16 = einsum(equation = var_8126_equation_0, values = (var_8020_cast_fp16_12, var_8093_cast_fp16))[name = tensor<string, []>("op_8126_cast_fp16")];
+            tensor<string, []> var_8128_equation_0 = const()[name = tensor<string, []>("op_8128_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8128_cast_fp16 = einsum(equation = var_8128_equation_0, values = (var_8020_cast_fp16_13, var_8094_cast_fp16))[name = tensor<string, []>("op_8128_cast_fp16")];
+            tensor<string, []> var_8130_equation_0 = const()[name = tensor<string, []>("op_8130_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8130_cast_fp16 = einsum(equation = var_8130_equation_0, values = (var_8020_cast_fp16_14, var_8095_cast_fp16))[name = tensor<string, []>("op_8130_cast_fp16")];
+            tensor<string, []> var_8132_equation_0 = const()[name = tensor<string, []>("op_8132_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8132_cast_fp16 = einsum(equation = var_8132_equation_0, values = (var_8020_cast_fp16_15, var_8096_cast_fp16))[name = tensor<string, []>("op_8132_cast_fp16")];
+            tensor<string, []> var_8134_equation_0 = const()[name = tensor<string, []>("op_8134_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8134_cast_fp16 = einsum(equation = var_8134_equation_0, values = (var_8020_cast_fp16_16, var_8097_cast_fp16))[name = tensor<string, []>("op_8134_cast_fp16")];
+            tensor<string, []> var_8136_equation_0 = const()[name = tensor<string, []>("op_8136_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8136_cast_fp16 = einsum(equation = var_8136_equation_0, values = (var_8020_cast_fp16_17, var_8098_cast_fp16))[name = tensor<string, []>("op_8136_cast_fp16")];
+            tensor<string, []> var_8138_equation_0 = const()[name = tensor<string, []>("op_8138_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8138_cast_fp16 = einsum(equation = var_8138_equation_0, values = (var_8020_cast_fp16_18, var_8099_cast_fp16))[name = tensor<string, []>("op_8138_cast_fp16")];
+            tensor<string, []> var_8140_equation_0 = const()[name = tensor<string, []>("op_8140_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8140_cast_fp16 = einsum(equation = var_8140_equation_0, values = (var_8020_cast_fp16_19, var_8100_cast_fp16))[name = tensor<string, []>("op_8140_cast_fp16")];
+            tensor<bool, []> input_295_interleave_0 = const()[name = tensor<string, []>("input_295_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_295_cast_fp16 = concat(axis = var_7925, interleave = input_295_interleave_0, values = (var_8102_cast_fp16, var_8104_cast_fp16, var_8106_cast_fp16, var_8108_cast_fp16, var_8110_cast_fp16, var_8112_cast_fp16, var_8114_cast_fp16, var_8116_cast_fp16, var_8118_cast_fp16, var_8120_cast_fp16, var_8122_cast_fp16, var_8124_cast_fp16, var_8126_cast_fp16, var_8128_cast_fp16, var_8130_cast_fp16, var_8132_cast_fp16, var_8134_cast_fp16, var_8136_cast_fp16, var_8138_cast_fp16, var_8140_cast_fp16))[name = tensor<string, []>("input_295_cast_fp16")];
+            tensor<string, []> var_8149_pad_type_0 = const()[name = tensor<string, []>("op_8149_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8149_strides_0 = const()[name = tensor<string, []>("op_8149_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8149_pad_0 = const()[name = tensor<string, []>("op_8149_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8149_dilations_0 = const()[name = tensor<string, []>("op_8149_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8149_groups_0 = const()[name = tensor<string, []>("op_8149_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_29_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_29_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1165376512)))];
+            tensor<fp16, [1280]> blocks_29_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_29_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1168653376)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8149_cast_fp16 = conv(bias = blocks_29_attn_out_bias_to_fp16, dilations = var_8149_dilations_0, groups = var_8149_groups_0, pad = var_8149_pad_0, pad_type = var_8149_pad_type_0, strides = var_8149_strides_0, weight = blocks_29_attn_out_weight_to_fp16, x = input_295_cast_fp16)[name = tensor<string, []>("op_8149_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_119_cast_fp16 = add(x = inputs_117_cast_fp16, y = var_8149_cast_fp16)[name = tensor<string, []>("inputs_119_cast_fp16")];
+            tensor<int32, [1]> input_297_axes_0 = const()[name = tensor<string, []>("input_297_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_297_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_297_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1168656000)))];
+            tensor<fp16, [1280]> input_297_beta_0_to_fp16 = const()[name = tensor<string, []>("input_297_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1168658624)))];
+            tensor<fp16, []> var_8159_to_fp16 = const()[name = tensor<string, []>("op_8159_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_297_cast_fp16 = layer_norm(axes = input_297_axes_0, beta = input_297_beta_0_to_fp16, epsilon = var_8159_to_fp16, gamma = input_297_gamma_0_to_fp16, x = inputs_119_cast_fp16)[name = tensor<string, []>("input_297_cast_fp16")];
+            tensor<string, []> input_299_pad_type_0 = const()[name = tensor<string, []>("input_299_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_299_strides_0 = const()[name = tensor<string, []>("input_299_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_299_pad_0 = const()[name = tensor<string, []>("input_299_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_299_dilations_0 = const()[name = tensor<string, []>("input_299_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_299_groups_0 = const()[name = tensor<string, []>("input_299_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_29_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_29_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1168661248)))];
+            tensor<fp16, [5120]> blocks_29_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_29_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1181768512)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_299_cast_fp16 = conv(bias = blocks_29_mlp_0_bias_to_fp16, dilations = input_299_dilations_0, groups = input_299_groups_0, pad = input_299_pad_0, pad_type = input_299_pad_type_0, strides = input_299_strides_0, weight = blocks_29_mlp_0_weight_to_fp16, x = input_297_cast_fp16)[name = tensor<string, []>("input_299_cast_fp16")];
+            tensor<string, []> input_301_mode_0 = const()[name = tensor<string, []>("input_301_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_301_cast_fp16 = gelu(mode = input_301_mode_0, x = input_299_cast_fp16)[name = tensor<string, []>("input_301_cast_fp16")];
+            tensor<string, []> var_8185_pad_type_0 = const()[name = tensor<string, []>("op_8185_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8185_strides_0 = const()[name = tensor<string, []>("op_8185_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8185_pad_0 = const()[name = tensor<string, []>("op_8185_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8185_dilations_0 = const()[name = tensor<string, []>("op_8185_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8185_groups_0 = const()[name = tensor<string, []>("op_8185_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_29_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_29_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1181778816)))];
+            tensor<fp16, [1280]> blocks_29_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_29_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1194886080)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8185_cast_fp16 = conv(bias = blocks_29_mlp_2_bias_to_fp16, dilations = var_8185_dilations_0, groups = var_8185_groups_0, pad = var_8185_pad_0, pad_type = var_8185_pad_type_0, strides = var_8185_strides_0, weight = blocks_29_mlp_2_weight_to_fp16, x = input_301_cast_fp16)[name = tensor<string, []>("op_8185_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_121_cast_fp16 = add(x = inputs_119_cast_fp16, y = var_8185_cast_fp16)[name = tensor<string, []>("inputs_121_cast_fp16")];
+            tensor<int32, []> var_8194 = const()[name = tensor<string, []>("op_8194"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_303_axes_0 = const()[name = tensor<string, []>("input_303_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_303_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_303_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1194888704)))];
+            tensor<fp16, [1280]> input_303_beta_0_to_fp16 = const()[name = tensor<string, []>("input_303_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1194891328)))];
+            tensor<fp16, []> var_8210_to_fp16 = const()[name = tensor<string, []>("op_8210_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_303_cast_fp16 = layer_norm(axes = input_303_axes_0, beta = input_303_beta_0_to_fp16, epsilon = var_8210_to_fp16, gamma = input_303_gamma_0_to_fp16, x = inputs_121_cast_fp16)[name = tensor<string, []>("input_303_cast_fp16")];
+            tensor<string, []> q_61_pad_type_0 = const()[name = tensor<string, []>("q_61_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_61_strides_0 = const()[name = tensor<string, []>("q_61_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_61_pad_0 = const()[name = tensor<string, []>("q_61_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_61_dilations_0 = const()[name = tensor<string, []>("q_61_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_61_groups_0 = const()[name = tensor<string, []>("q_61_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_8245_weight_0_to_fp16 = const()[name = tensor<string, []>("op_8245_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1194893952)))];
+            tensor<fp16, [1280]> var_8245_bias_0_to_fp16 = const()[name = tensor<string, []>("op_8245_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1198170816)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8245_cast_fp16 = conv(bias = var_8245_bias_0_to_fp16, dilations = q_61_dilations_0, groups = q_61_groups_0, pad = q_61_pad_0, pad_type = q_61_pad_type_0, strides = q_61_strides_0, weight = var_8245_weight_0_to_fp16, x = input_303_cast_fp16)[name = tensor<string, []>("op_8245_cast_fp16")];
+            tensor<string, []> k_61_pad_type_0 = const()[name = tensor<string, []>("k_61_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_61_strides_0 = const()[name = tensor<string, []>("k_61_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_61_pad_0 = const()[name = tensor<string, []>("k_61_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_61_dilations_0 = const()[name = tensor<string, []>("k_61_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_61_groups_0 = const()[name = tensor<string, []>("k_61_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_30_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_30_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1198173440)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_61_cast_fp16 = conv(dilations = k_61_dilations_0, groups = k_61_groups_0, pad = k_61_pad_0, pad_type = k_61_pad_type_0, strides = k_61_strides_0, weight = blocks_30_attn_key_weight_to_fp16, x = input_303_cast_fp16)[name = tensor<string, []>("k_61_cast_fp16")];
+            tensor<string, []> var_8243_pad_type_0 = const()[name = tensor<string, []>("op_8243_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8243_strides_0 = const()[name = tensor<string, []>("op_8243_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8243_pad_0 = const()[name = tensor<string, []>("op_8243_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8243_dilations_0 = const()[name = tensor<string, []>("op_8243_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8243_groups_0 = const()[name = tensor<string, []>("op_8243_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_30_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_30_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1201450304)))];
+            tensor<fp16, [1280]> blocks_30_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_30_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1204727168)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8243_cast_fp16 = conv(bias = blocks_30_attn_value_bias_to_fp16, dilations = var_8243_dilations_0, groups = var_8243_groups_0, pad = var_8243_pad_0, pad_type = var_8243_pad_type_0, strides = var_8243_strides_0, weight = blocks_30_attn_value_weight_to_fp16, x = input_303_cast_fp16)[name = tensor<string, []>("op_8243_cast_fp16")];
+            tensor<int32, [20]> tile_90 = const()[name = tensor<string, []>("tile_90"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8246_axis_0 = const()[name = tensor<string, []>("op_8246_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_19 = split(axis = var_8246_axis_0, split_sizes = tile_90, x = var_8245_cast_fp16)[name = tensor<string, []>("op_8246_cast_fp16")];
+            tensor<int32, [4]> var_8267_perm_0 = const()[name = tensor<string, []>("op_8267_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_91 = const()[name = tensor<string, []>("tile_91"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8268_axis_0 = const()[name = tensor<string, []>("op_8268_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_8267_cast_fp16 = transpose(perm = var_8267_perm_0, x = k_61_cast_fp16)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_19 = split(axis = var_8268_axis_0, split_sizes = tile_91, x = var_8267_cast_fp16)[name = tensor<string, []>("op_8268_cast_fp16")];
+            tensor<int32, [20]> tile_92 = const()[name = tensor<string, []>("tile_92"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8289_axis_0 = const()[name = tensor<string, []>("op_8289_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_19 = split(axis = var_8289_axis_0, split_sizes = tile_92, x = var_8243_cast_fp16)[name = tensor<string, []>("op_8289_cast_fp16")];
+            tensor<string, []> aw_1201_equation_0 = const()[name = tensor<string, []>("aw_1201_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1201_cast_fp16 = einsum(equation = aw_1201_equation_0, values = (var_8268_cast_fp16_0, var_8246_cast_fp16_0))[name = tensor<string, []>("aw_1201_cast_fp16")];
+            tensor<string, []> aw_1203_equation_0 = const()[name = tensor<string, []>("aw_1203_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1203_cast_fp16 = einsum(equation = aw_1203_equation_0, values = (var_8268_cast_fp16_1, var_8246_cast_fp16_1))[name = tensor<string, []>("aw_1203_cast_fp16")];
+            tensor<string, []> aw_1205_equation_0 = const()[name = tensor<string, []>("aw_1205_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1205_cast_fp16 = einsum(equation = aw_1205_equation_0, values = (var_8268_cast_fp16_2, var_8246_cast_fp16_2))[name = tensor<string, []>("aw_1205_cast_fp16")];
+            tensor<string, []> aw_1207_equation_0 = const()[name = tensor<string, []>("aw_1207_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1207_cast_fp16 = einsum(equation = aw_1207_equation_0, values = (var_8268_cast_fp16_3, var_8246_cast_fp16_3))[name = tensor<string, []>("aw_1207_cast_fp16")];
+            tensor<string, []> aw_1209_equation_0 = const()[name = tensor<string, []>("aw_1209_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1209_cast_fp16 = einsum(equation = aw_1209_equation_0, values = (var_8268_cast_fp16_4, var_8246_cast_fp16_4))[name = tensor<string, []>("aw_1209_cast_fp16")];
+            tensor<string, []> aw_1211_equation_0 = const()[name = tensor<string, []>("aw_1211_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1211_cast_fp16 = einsum(equation = aw_1211_equation_0, values = (var_8268_cast_fp16_5, var_8246_cast_fp16_5))[name = tensor<string, []>("aw_1211_cast_fp16")];
+            tensor<string, []> aw_1213_equation_0 = const()[name = tensor<string, []>("aw_1213_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1213_cast_fp16 = einsum(equation = aw_1213_equation_0, values = (var_8268_cast_fp16_6, var_8246_cast_fp16_6))[name = tensor<string, []>("aw_1213_cast_fp16")];
+            tensor<string, []> aw_1215_equation_0 = const()[name = tensor<string, []>("aw_1215_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1215_cast_fp16 = einsum(equation = aw_1215_equation_0, values = (var_8268_cast_fp16_7, var_8246_cast_fp16_7))[name = tensor<string, []>("aw_1215_cast_fp16")];
+            tensor<string, []> aw_1217_equation_0 = const()[name = tensor<string, []>("aw_1217_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1217_cast_fp16 = einsum(equation = aw_1217_equation_0, values = (var_8268_cast_fp16_8, var_8246_cast_fp16_8))[name = tensor<string, []>("aw_1217_cast_fp16")];
+            tensor<string, []> aw_1219_equation_0 = const()[name = tensor<string, []>("aw_1219_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1219_cast_fp16 = einsum(equation = aw_1219_equation_0, values = (var_8268_cast_fp16_9, var_8246_cast_fp16_9))[name = tensor<string, []>("aw_1219_cast_fp16")];
+            tensor<string, []> aw_1221_equation_0 = const()[name = tensor<string, []>("aw_1221_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1221_cast_fp16 = einsum(equation = aw_1221_equation_0, values = (var_8268_cast_fp16_10, var_8246_cast_fp16_10))[name = tensor<string, []>("aw_1221_cast_fp16")];
+            tensor<string, []> aw_1223_equation_0 = const()[name = tensor<string, []>("aw_1223_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1223_cast_fp16 = einsum(equation = aw_1223_equation_0, values = (var_8268_cast_fp16_11, var_8246_cast_fp16_11))[name = tensor<string, []>("aw_1223_cast_fp16")];
+            tensor<string, []> aw_1225_equation_0 = const()[name = tensor<string, []>("aw_1225_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1225_cast_fp16 = einsum(equation = aw_1225_equation_0, values = (var_8268_cast_fp16_12, var_8246_cast_fp16_12))[name = tensor<string, []>("aw_1225_cast_fp16")];
+            tensor<string, []> aw_1227_equation_0 = const()[name = tensor<string, []>("aw_1227_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1227_cast_fp16 = einsum(equation = aw_1227_equation_0, values = (var_8268_cast_fp16_13, var_8246_cast_fp16_13))[name = tensor<string, []>("aw_1227_cast_fp16")];
+            tensor<string, []> aw_1229_equation_0 = const()[name = tensor<string, []>("aw_1229_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1229_cast_fp16 = einsum(equation = aw_1229_equation_0, values = (var_8268_cast_fp16_14, var_8246_cast_fp16_14))[name = tensor<string, []>("aw_1229_cast_fp16")];
+            tensor<string, []> aw_1231_equation_0 = const()[name = tensor<string, []>("aw_1231_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1231_cast_fp16 = einsum(equation = aw_1231_equation_0, values = (var_8268_cast_fp16_15, var_8246_cast_fp16_15))[name = tensor<string, []>("aw_1231_cast_fp16")];
+            tensor<string, []> aw_1233_equation_0 = const()[name = tensor<string, []>("aw_1233_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1233_cast_fp16 = einsum(equation = aw_1233_equation_0, values = (var_8268_cast_fp16_16, var_8246_cast_fp16_16))[name = tensor<string, []>("aw_1233_cast_fp16")];
+            tensor<string, []> aw_1235_equation_0 = const()[name = tensor<string, []>("aw_1235_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1235_cast_fp16 = einsum(equation = aw_1235_equation_0, values = (var_8268_cast_fp16_17, var_8246_cast_fp16_17))[name = tensor<string, []>("aw_1235_cast_fp16")];
+            tensor<string, []> aw_1237_equation_0 = const()[name = tensor<string, []>("aw_1237_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1237_cast_fp16 = einsum(equation = aw_1237_equation_0, values = (var_8268_cast_fp16_18, var_8246_cast_fp16_18))[name = tensor<string, []>("aw_1237_cast_fp16")];
+            tensor<string, []> aw_1239_equation_0 = const()[name = tensor<string, []>("aw_1239_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1239_cast_fp16 = einsum(equation = aw_1239_equation_0, values = (var_8268_cast_fp16_19, var_8246_cast_fp16_19))[name = tensor<string, []>("aw_1239_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8350_cast_fp16 = softmax(axis = var_8194, x = aw_1201_cast_fp16)[name = tensor<string, []>("op_8350_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8351_cast_fp16 = softmax(axis = var_8194, x = aw_1203_cast_fp16)[name = tensor<string, []>("op_8351_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8352_cast_fp16 = softmax(axis = var_8194, x = aw_1205_cast_fp16)[name = tensor<string, []>("op_8352_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8353_cast_fp16 = softmax(axis = var_8194, x = aw_1207_cast_fp16)[name = tensor<string, []>("op_8353_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8354_cast_fp16 = softmax(axis = var_8194, x = aw_1209_cast_fp16)[name = tensor<string, []>("op_8354_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8355_cast_fp16 = softmax(axis = var_8194, x = aw_1211_cast_fp16)[name = tensor<string, []>("op_8355_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8356_cast_fp16 = softmax(axis = var_8194, x = aw_1213_cast_fp16)[name = tensor<string, []>("op_8356_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8357_cast_fp16 = softmax(axis = var_8194, x = aw_1215_cast_fp16)[name = tensor<string, []>("op_8357_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8358_cast_fp16 = softmax(axis = var_8194, x = aw_1217_cast_fp16)[name = tensor<string, []>("op_8358_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8359_cast_fp16 = softmax(axis = var_8194, x = aw_1219_cast_fp16)[name = tensor<string, []>("op_8359_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8360_cast_fp16 = softmax(axis = var_8194, x = aw_1221_cast_fp16)[name = tensor<string, []>("op_8360_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8361_cast_fp16 = softmax(axis = var_8194, x = aw_1223_cast_fp16)[name = tensor<string, []>("op_8361_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8362_cast_fp16 = softmax(axis = var_8194, x = aw_1225_cast_fp16)[name = tensor<string, []>("op_8362_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8363_cast_fp16 = softmax(axis = var_8194, x = aw_1227_cast_fp16)[name = tensor<string, []>("op_8363_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8364_cast_fp16 = softmax(axis = var_8194, x = aw_1229_cast_fp16)[name = tensor<string, []>("op_8364_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8365_cast_fp16 = softmax(axis = var_8194, x = aw_1231_cast_fp16)[name = tensor<string, []>("op_8365_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8366_cast_fp16 = softmax(axis = var_8194, x = aw_1233_cast_fp16)[name = tensor<string, []>("op_8366_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8367_cast_fp16 = softmax(axis = var_8194, x = aw_1235_cast_fp16)[name = tensor<string, []>("op_8367_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8368_cast_fp16 = softmax(axis = var_8194, x = aw_1237_cast_fp16)[name = tensor<string, []>("op_8368_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8369_cast_fp16 = softmax(axis = var_8194, x = aw_1239_cast_fp16)[name = tensor<string, []>("op_8369_cast_fp16")];
+            tensor<string, []> var_8371_equation_0 = const()[name = tensor<string, []>("op_8371_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8371_cast_fp16 = einsum(equation = var_8371_equation_0, values = (var_8289_cast_fp16_0, var_8350_cast_fp16))[name = tensor<string, []>("op_8371_cast_fp16")];
+            tensor<string, []> var_8373_equation_0 = const()[name = tensor<string, []>("op_8373_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8373_cast_fp16 = einsum(equation = var_8373_equation_0, values = (var_8289_cast_fp16_1, var_8351_cast_fp16))[name = tensor<string, []>("op_8373_cast_fp16")];
+            tensor<string, []> var_8375_equation_0 = const()[name = tensor<string, []>("op_8375_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8375_cast_fp16 = einsum(equation = var_8375_equation_0, values = (var_8289_cast_fp16_2, var_8352_cast_fp16))[name = tensor<string, []>("op_8375_cast_fp16")];
+            tensor<string, []> var_8377_equation_0 = const()[name = tensor<string, []>("op_8377_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8377_cast_fp16 = einsum(equation = var_8377_equation_0, values = (var_8289_cast_fp16_3, var_8353_cast_fp16))[name = tensor<string, []>("op_8377_cast_fp16")];
+            tensor<string, []> var_8379_equation_0 = const()[name = tensor<string, []>("op_8379_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8379_cast_fp16 = einsum(equation = var_8379_equation_0, values = (var_8289_cast_fp16_4, var_8354_cast_fp16))[name = tensor<string, []>("op_8379_cast_fp16")];
+            tensor<string, []> var_8381_equation_0 = const()[name = tensor<string, []>("op_8381_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8381_cast_fp16 = einsum(equation = var_8381_equation_0, values = (var_8289_cast_fp16_5, var_8355_cast_fp16))[name = tensor<string, []>("op_8381_cast_fp16")];
+            tensor<string, []> var_8383_equation_0 = const()[name = tensor<string, []>("op_8383_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8383_cast_fp16 = einsum(equation = var_8383_equation_0, values = (var_8289_cast_fp16_6, var_8356_cast_fp16))[name = tensor<string, []>("op_8383_cast_fp16")];
+            tensor<string, []> var_8385_equation_0 = const()[name = tensor<string, []>("op_8385_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8385_cast_fp16 = einsum(equation = var_8385_equation_0, values = (var_8289_cast_fp16_7, var_8357_cast_fp16))[name = tensor<string, []>("op_8385_cast_fp16")];
+            tensor<string, []> var_8387_equation_0 = const()[name = tensor<string, []>("op_8387_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8387_cast_fp16 = einsum(equation = var_8387_equation_0, values = (var_8289_cast_fp16_8, var_8358_cast_fp16))[name = tensor<string, []>("op_8387_cast_fp16")];
+            tensor<string, []> var_8389_equation_0 = const()[name = tensor<string, []>("op_8389_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8389_cast_fp16 = einsum(equation = var_8389_equation_0, values = (var_8289_cast_fp16_9, var_8359_cast_fp16))[name = tensor<string, []>("op_8389_cast_fp16")];
+            tensor<string, []> var_8391_equation_0 = const()[name = tensor<string, []>("op_8391_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8391_cast_fp16 = einsum(equation = var_8391_equation_0, values = (var_8289_cast_fp16_10, var_8360_cast_fp16))[name = tensor<string, []>("op_8391_cast_fp16")];
+            tensor<string, []> var_8393_equation_0 = const()[name = tensor<string, []>("op_8393_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8393_cast_fp16 = einsum(equation = var_8393_equation_0, values = (var_8289_cast_fp16_11, var_8361_cast_fp16))[name = tensor<string, []>("op_8393_cast_fp16")];
+            tensor<string, []> var_8395_equation_0 = const()[name = tensor<string, []>("op_8395_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8395_cast_fp16 = einsum(equation = var_8395_equation_0, values = (var_8289_cast_fp16_12, var_8362_cast_fp16))[name = tensor<string, []>("op_8395_cast_fp16")];
+            tensor<string, []> var_8397_equation_0 = const()[name = tensor<string, []>("op_8397_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8397_cast_fp16 = einsum(equation = var_8397_equation_0, values = (var_8289_cast_fp16_13, var_8363_cast_fp16))[name = tensor<string, []>("op_8397_cast_fp16")];
+            tensor<string, []> var_8399_equation_0 = const()[name = tensor<string, []>("op_8399_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8399_cast_fp16 = einsum(equation = var_8399_equation_0, values = (var_8289_cast_fp16_14, var_8364_cast_fp16))[name = tensor<string, []>("op_8399_cast_fp16")];
+            tensor<string, []> var_8401_equation_0 = const()[name = tensor<string, []>("op_8401_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8401_cast_fp16 = einsum(equation = var_8401_equation_0, values = (var_8289_cast_fp16_15, var_8365_cast_fp16))[name = tensor<string, []>("op_8401_cast_fp16")];
+            tensor<string, []> var_8403_equation_0 = const()[name = tensor<string, []>("op_8403_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8403_cast_fp16 = einsum(equation = var_8403_equation_0, values = (var_8289_cast_fp16_16, var_8366_cast_fp16))[name = tensor<string, []>("op_8403_cast_fp16")];
+            tensor<string, []> var_8405_equation_0 = const()[name = tensor<string, []>("op_8405_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8405_cast_fp16 = einsum(equation = var_8405_equation_0, values = (var_8289_cast_fp16_17, var_8367_cast_fp16))[name = tensor<string, []>("op_8405_cast_fp16")];
+            tensor<string, []> var_8407_equation_0 = const()[name = tensor<string, []>("op_8407_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8407_cast_fp16 = einsum(equation = var_8407_equation_0, values = (var_8289_cast_fp16_18, var_8368_cast_fp16))[name = tensor<string, []>("op_8407_cast_fp16")];
+            tensor<string, []> var_8409_equation_0 = const()[name = tensor<string, []>("op_8409_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8409_cast_fp16 = einsum(equation = var_8409_equation_0, values = (var_8289_cast_fp16_19, var_8369_cast_fp16))[name = tensor<string, []>("op_8409_cast_fp16")];
+            tensor<bool, []> input_305_interleave_0 = const()[name = tensor<string, []>("input_305_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_305_cast_fp16 = concat(axis = var_8194, interleave = input_305_interleave_0, values = (var_8371_cast_fp16, var_8373_cast_fp16, var_8375_cast_fp16, var_8377_cast_fp16, var_8379_cast_fp16, var_8381_cast_fp16, var_8383_cast_fp16, var_8385_cast_fp16, var_8387_cast_fp16, var_8389_cast_fp16, var_8391_cast_fp16, var_8393_cast_fp16, var_8395_cast_fp16, var_8397_cast_fp16, var_8399_cast_fp16, var_8401_cast_fp16, var_8403_cast_fp16, var_8405_cast_fp16, var_8407_cast_fp16, var_8409_cast_fp16))[name = tensor<string, []>("input_305_cast_fp16")];
+            tensor<string, []> var_8418_pad_type_0 = const()[name = tensor<string, []>("op_8418_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8418_strides_0 = const()[name = tensor<string, []>("op_8418_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8418_pad_0 = const()[name = tensor<string, []>("op_8418_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8418_dilations_0 = const()[name = tensor<string, []>("op_8418_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8418_groups_0 = const()[name = tensor<string, []>("op_8418_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_30_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_30_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1204729792)))];
+            tensor<fp16, [1280]> blocks_30_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_30_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1208006656)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8418_cast_fp16 = conv(bias = blocks_30_attn_out_bias_to_fp16, dilations = var_8418_dilations_0, groups = var_8418_groups_0, pad = var_8418_pad_0, pad_type = var_8418_pad_type_0, strides = var_8418_strides_0, weight = blocks_30_attn_out_weight_to_fp16, x = input_305_cast_fp16)[name = tensor<string, []>("op_8418_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_123_cast_fp16 = add(x = inputs_121_cast_fp16, y = var_8418_cast_fp16)[name = tensor<string, []>("inputs_123_cast_fp16")];
+            tensor<int32, [1]> input_307_axes_0 = const()[name = tensor<string, []>("input_307_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_307_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_307_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1208009280)))];
+            tensor<fp16, [1280]> input_307_beta_0_to_fp16 = const()[name = tensor<string, []>("input_307_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1208011904)))];
+            tensor<fp16, []> var_8428_to_fp16 = const()[name = tensor<string, []>("op_8428_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_307_cast_fp16 = layer_norm(axes = input_307_axes_0, beta = input_307_beta_0_to_fp16, epsilon = var_8428_to_fp16, gamma = input_307_gamma_0_to_fp16, x = inputs_123_cast_fp16)[name = tensor<string, []>("input_307_cast_fp16")];
+            tensor<string, []> input_309_pad_type_0 = const()[name = tensor<string, []>("input_309_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_309_strides_0 = const()[name = tensor<string, []>("input_309_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_309_pad_0 = const()[name = tensor<string, []>("input_309_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_309_dilations_0 = const()[name = tensor<string, []>("input_309_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_309_groups_0 = const()[name = tensor<string, []>("input_309_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_30_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_30_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1208014528)))];
+            tensor<fp16, [5120]> blocks_30_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_30_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1221121792)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_309_cast_fp16 = conv(bias = blocks_30_mlp_0_bias_to_fp16, dilations = input_309_dilations_0, groups = input_309_groups_0, pad = input_309_pad_0, pad_type = input_309_pad_type_0, strides = input_309_strides_0, weight = blocks_30_mlp_0_weight_to_fp16, x = input_307_cast_fp16)[name = tensor<string, []>("input_309_cast_fp16")];
+            tensor<string, []> input_311_mode_0 = const()[name = tensor<string, []>("input_311_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_311_cast_fp16 = gelu(mode = input_311_mode_0, x = input_309_cast_fp16)[name = tensor<string, []>("input_311_cast_fp16")];
+            tensor<string, []> var_8454_pad_type_0 = const()[name = tensor<string, []>("op_8454_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8454_strides_0 = const()[name = tensor<string, []>("op_8454_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8454_pad_0 = const()[name = tensor<string, []>("op_8454_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8454_dilations_0 = const()[name = tensor<string, []>("op_8454_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8454_groups_0 = const()[name = tensor<string, []>("op_8454_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_30_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_30_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1221132096)))];
+            tensor<fp16, [1280]> blocks_30_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_30_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1234239360)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8454_cast_fp16 = conv(bias = blocks_30_mlp_2_bias_to_fp16, dilations = var_8454_dilations_0, groups = var_8454_groups_0, pad = var_8454_pad_0, pad_type = var_8454_pad_type_0, strides = var_8454_strides_0, weight = blocks_30_mlp_2_weight_to_fp16, x = input_311_cast_fp16)[name = tensor<string, []>("op_8454_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_125_cast_fp16 = add(x = inputs_123_cast_fp16, y = var_8454_cast_fp16)[name = tensor<string, []>("inputs_125_cast_fp16")];
+            tensor<int32, []> var_8463 = const()[name = tensor<string, []>("op_8463"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_313_axes_0 = const()[name = tensor<string, []>("input_313_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_313_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_313_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1234241984)))];
+            tensor<fp16, [1280]> input_313_beta_0_to_fp16 = const()[name = tensor<string, []>("input_313_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1234244608)))];
+            tensor<fp16, []> var_8479_to_fp16 = const()[name = tensor<string, []>("op_8479_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_313_cast_fp16 = layer_norm(axes = input_313_axes_0, beta = input_313_beta_0_to_fp16, epsilon = var_8479_to_fp16, gamma = input_313_gamma_0_to_fp16, x = inputs_125_cast_fp16)[name = tensor<string, []>("input_313_cast_fp16")];
+            tensor<string, []> q_pad_type_0 = const()[name = tensor<string, []>("q_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_strides_0 = const()[name = tensor<string, []>("q_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_pad_0 = const()[name = tensor<string, []>("q_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_dilations_0 = const()[name = tensor<string, []>("q_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_groups_0 = const()[name = tensor<string, []>("q_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_8514_weight_0_to_fp16 = const()[name = tensor<string, []>("op_8514_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1234247232)))];
+            tensor<fp16, [1280]> var_8514_bias_0_to_fp16 = const()[name = tensor<string, []>("op_8514_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1237524096)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8514_cast_fp16 = conv(bias = var_8514_bias_0_to_fp16, dilations = q_dilations_0, groups = q_groups_0, pad = q_pad_0, pad_type = q_pad_type_0, strides = q_strides_0, weight = var_8514_weight_0_to_fp16, x = input_313_cast_fp16)[name = tensor<string, []>("op_8514_cast_fp16")];
+            tensor<string, []> k_pad_type_0 = const()[name = tensor<string, []>("k_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_strides_0 = const()[name = tensor<string, []>("k_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_pad_0 = const()[name = tensor<string, []>("k_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_dilations_0 = const()[name = tensor<string, []>("k_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_groups_0 = const()[name = tensor<string, []>("k_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_31_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_31_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1237526720)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_cast_fp16 = conv(dilations = k_dilations_0, groups = k_groups_0, pad = k_pad_0, pad_type = k_pad_type_0, strides = k_strides_0, weight = blocks_31_attn_key_weight_to_fp16, x = input_313_cast_fp16)[name = tensor<string, []>("k_cast_fp16")];
+            tensor<string, []> var_8512_pad_type_0 = const()[name = tensor<string, []>("op_8512_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8512_strides_0 = const()[name = tensor<string, []>("op_8512_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8512_pad_0 = const()[name = tensor<string, []>("op_8512_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8512_dilations_0 = const()[name = tensor<string, []>("op_8512_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8512_groups_0 = const()[name = tensor<string, []>("op_8512_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_31_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_31_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1240803584)))];
+            tensor<fp16, [1280]> blocks_31_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_31_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1244080448)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8512_cast_fp16 = conv(bias = blocks_31_attn_value_bias_to_fp16, dilations = var_8512_dilations_0, groups = var_8512_groups_0, pad = var_8512_pad_0, pad_type = var_8512_pad_type_0, strides = var_8512_strides_0, weight = blocks_31_attn_value_weight_to_fp16, x = input_313_cast_fp16)[name = tensor<string, []>("op_8512_cast_fp16")];
+            tensor<int32, [20]> tile_93 = const()[name = tensor<string, []>("tile_93"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8515_axis_0 = const()[name = tensor<string, []>("op_8515_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_19 = split(axis = var_8515_axis_0, split_sizes = tile_93, x = var_8514_cast_fp16)[name = tensor<string, []>("op_8515_cast_fp16")];
+            tensor<int32, [4]> var_8536_perm_0 = const()[name = tensor<string, []>("op_8536_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_94 = const()[name = tensor<string, []>("tile_94"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8537_axis_0 = const()[name = tensor<string, []>("op_8537_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_8536_cast_fp16 = transpose(perm = var_8536_perm_0, x = k_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_19 = split(axis = var_8537_axis_0, split_sizes = tile_94, x = var_8536_cast_fp16)[name = tensor<string, []>("op_8537_cast_fp16")];
+            tensor<int32, [20]> tile_95 = const()[name = tensor<string, []>("tile_95"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8558_axis_0 = const()[name = tensor<string, []>("op_8558_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_19 = split(axis = var_8558_axis_0, split_sizes = tile_95, x = var_8512_cast_fp16)[name = tensor<string, []>("op_8558_cast_fp16")];
+            tensor<string, []> aw_1241_equation_0 = const()[name = tensor<string, []>("aw_1241_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1241_cast_fp16 = einsum(equation = aw_1241_equation_0, values = (var_8537_cast_fp16_0, var_8515_cast_fp16_0))[name = tensor<string, []>("aw_1241_cast_fp16")];
+            tensor<string, []> aw_1243_equation_0 = const()[name = tensor<string, []>("aw_1243_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1243_cast_fp16 = einsum(equation = aw_1243_equation_0, values = (var_8537_cast_fp16_1, var_8515_cast_fp16_1))[name = tensor<string, []>("aw_1243_cast_fp16")];
+            tensor<string, []> aw_1245_equation_0 = const()[name = tensor<string, []>("aw_1245_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1245_cast_fp16 = einsum(equation = aw_1245_equation_0, values = (var_8537_cast_fp16_2, var_8515_cast_fp16_2))[name = tensor<string, []>("aw_1245_cast_fp16")];
+            tensor<string, []> aw_1247_equation_0 = const()[name = tensor<string, []>("aw_1247_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1247_cast_fp16 = einsum(equation = aw_1247_equation_0, values = (var_8537_cast_fp16_3, var_8515_cast_fp16_3))[name = tensor<string, []>("aw_1247_cast_fp16")];
+            tensor<string, []> aw_1249_equation_0 = const()[name = tensor<string, []>("aw_1249_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1249_cast_fp16 = einsum(equation = aw_1249_equation_0, values = (var_8537_cast_fp16_4, var_8515_cast_fp16_4))[name = tensor<string, []>("aw_1249_cast_fp16")];
+            tensor<string, []> aw_1251_equation_0 = const()[name = tensor<string, []>("aw_1251_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1251_cast_fp16 = einsum(equation = aw_1251_equation_0, values = (var_8537_cast_fp16_5, var_8515_cast_fp16_5))[name = tensor<string, []>("aw_1251_cast_fp16")];
+            tensor<string, []> aw_1253_equation_0 = const()[name = tensor<string, []>("aw_1253_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1253_cast_fp16 = einsum(equation = aw_1253_equation_0, values = (var_8537_cast_fp16_6, var_8515_cast_fp16_6))[name = tensor<string, []>("aw_1253_cast_fp16")];
+            tensor<string, []> aw_1255_equation_0 = const()[name = tensor<string, []>("aw_1255_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1255_cast_fp16 = einsum(equation = aw_1255_equation_0, values = (var_8537_cast_fp16_7, var_8515_cast_fp16_7))[name = tensor<string, []>("aw_1255_cast_fp16")];
+            tensor<string, []> aw_1257_equation_0 = const()[name = tensor<string, []>("aw_1257_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1257_cast_fp16 = einsum(equation = aw_1257_equation_0, values = (var_8537_cast_fp16_8, var_8515_cast_fp16_8))[name = tensor<string, []>("aw_1257_cast_fp16")];
+            tensor<string, []> aw_1259_equation_0 = const()[name = tensor<string, []>("aw_1259_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1259_cast_fp16 = einsum(equation = aw_1259_equation_0, values = (var_8537_cast_fp16_9, var_8515_cast_fp16_9))[name = tensor<string, []>("aw_1259_cast_fp16")];
+            tensor<string, []> aw_1261_equation_0 = const()[name = tensor<string, []>("aw_1261_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1261_cast_fp16 = einsum(equation = aw_1261_equation_0, values = (var_8537_cast_fp16_10, var_8515_cast_fp16_10))[name = tensor<string, []>("aw_1261_cast_fp16")];
+            tensor<string, []> aw_1263_equation_0 = const()[name = tensor<string, []>("aw_1263_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1263_cast_fp16 = einsum(equation = aw_1263_equation_0, values = (var_8537_cast_fp16_11, var_8515_cast_fp16_11))[name = tensor<string, []>("aw_1263_cast_fp16")];
+            tensor<string, []> aw_1265_equation_0 = const()[name = tensor<string, []>("aw_1265_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1265_cast_fp16 = einsum(equation = aw_1265_equation_0, values = (var_8537_cast_fp16_12, var_8515_cast_fp16_12))[name = tensor<string, []>("aw_1265_cast_fp16")];
+            tensor<string, []> aw_1267_equation_0 = const()[name = tensor<string, []>("aw_1267_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1267_cast_fp16 = einsum(equation = aw_1267_equation_0, values = (var_8537_cast_fp16_13, var_8515_cast_fp16_13))[name = tensor<string, []>("aw_1267_cast_fp16")];
+            tensor<string, []> aw_1269_equation_0 = const()[name = tensor<string, []>("aw_1269_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1269_cast_fp16 = einsum(equation = aw_1269_equation_0, values = (var_8537_cast_fp16_14, var_8515_cast_fp16_14))[name = tensor<string, []>("aw_1269_cast_fp16")];
+            tensor<string, []> aw_1271_equation_0 = const()[name = tensor<string, []>("aw_1271_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1271_cast_fp16 = einsum(equation = aw_1271_equation_0, values = (var_8537_cast_fp16_15, var_8515_cast_fp16_15))[name = tensor<string, []>("aw_1271_cast_fp16")];
+            tensor<string, []> aw_1273_equation_0 = const()[name = tensor<string, []>("aw_1273_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1273_cast_fp16 = einsum(equation = aw_1273_equation_0, values = (var_8537_cast_fp16_16, var_8515_cast_fp16_16))[name = tensor<string, []>("aw_1273_cast_fp16")];
+            tensor<string, []> aw_1275_equation_0 = const()[name = tensor<string, []>("aw_1275_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1275_cast_fp16 = einsum(equation = aw_1275_equation_0, values = (var_8537_cast_fp16_17, var_8515_cast_fp16_17))[name = tensor<string, []>("aw_1275_cast_fp16")];
+            tensor<string, []> aw_1277_equation_0 = const()[name = tensor<string, []>("aw_1277_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1277_cast_fp16 = einsum(equation = aw_1277_equation_0, values = (var_8537_cast_fp16_18, var_8515_cast_fp16_18))[name = tensor<string, []>("aw_1277_cast_fp16")];
+            tensor<string, []> aw_equation_0 = const()[name = tensor<string, []>("aw_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_cast_fp16 = einsum(equation = aw_equation_0, values = (var_8537_cast_fp16_19, var_8515_cast_fp16_19))[name = tensor<string, []>("aw_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8619_cast_fp16 = softmax(axis = var_8463, x = aw_1241_cast_fp16)[name = tensor<string, []>("op_8619_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8620_cast_fp16 = softmax(axis = var_8463, x = aw_1243_cast_fp16)[name = tensor<string, []>("op_8620_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8621_cast_fp16 = softmax(axis = var_8463, x = aw_1245_cast_fp16)[name = tensor<string, []>("op_8621_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8622_cast_fp16 = softmax(axis = var_8463, x = aw_1247_cast_fp16)[name = tensor<string, []>("op_8622_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8623_cast_fp16 = softmax(axis = var_8463, x = aw_1249_cast_fp16)[name = tensor<string, []>("op_8623_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8624_cast_fp16 = softmax(axis = var_8463, x = aw_1251_cast_fp16)[name = tensor<string, []>("op_8624_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8625_cast_fp16 = softmax(axis = var_8463, x = aw_1253_cast_fp16)[name = tensor<string, []>("op_8625_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8626_cast_fp16 = softmax(axis = var_8463, x = aw_1255_cast_fp16)[name = tensor<string, []>("op_8626_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8627_cast_fp16 = softmax(axis = var_8463, x = aw_1257_cast_fp16)[name = tensor<string, []>("op_8627_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8628_cast_fp16 = softmax(axis = var_8463, x = aw_1259_cast_fp16)[name = tensor<string, []>("op_8628_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8629_cast_fp16 = softmax(axis = var_8463, x = aw_1261_cast_fp16)[name = tensor<string, []>("op_8629_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8630_cast_fp16 = softmax(axis = var_8463, x = aw_1263_cast_fp16)[name = tensor<string, []>("op_8630_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8631_cast_fp16 = softmax(axis = var_8463, x = aw_1265_cast_fp16)[name = tensor<string, []>("op_8631_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8632_cast_fp16 = softmax(axis = var_8463, x = aw_1267_cast_fp16)[name = tensor<string, []>("op_8632_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8633_cast_fp16 = softmax(axis = var_8463, x = aw_1269_cast_fp16)[name = tensor<string, []>("op_8633_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8634_cast_fp16 = softmax(axis = var_8463, x = aw_1271_cast_fp16)[name = tensor<string, []>("op_8634_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8635_cast_fp16 = softmax(axis = var_8463, x = aw_1273_cast_fp16)[name = tensor<string, []>("op_8635_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8636_cast_fp16 = softmax(axis = var_8463, x = aw_1275_cast_fp16)[name = tensor<string, []>("op_8636_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8637_cast_fp16 = softmax(axis = var_8463, x = aw_1277_cast_fp16)[name = tensor<string, []>("op_8637_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8638_cast_fp16 = softmax(axis = var_8463, x = aw_cast_fp16)[name = tensor<string, []>("op_8638_cast_fp16")];
+            tensor<string, []> var_8640_equation_0 = const()[name = tensor<string, []>("op_8640_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8640_cast_fp16 = einsum(equation = var_8640_equation_0, values = (var_8558_cast_fp16_0, var_8619_cast_fp16))[name = tensor<string, []>("op_8640_cast_fp16")];
+            tensor<string, []> var_8642_equation_0 = const()[name = tensor<string, []>("op_8642_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8642_cast_fp16 = einsum(equation = var_8642_equation_0, values = (var_8558_cast_fp16_1, var_8620_cast_fp16))[name = tensor<string, []>("op_8642_cast_fp16")];
+            tensor<string, []> var_8644_equation_0 = const()[name = tensor<string, []>("op_8644_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8644_cast_fp16 = einsum(equation = var_8644_equation_0, values = (var_8558_cast_fp16_2, var_8621_cast_fp16))[name = tensor<string, []>("op_8644_cast_fp16")];
+            tensor<string, []> var_8646_equation_0 = const()[name = tensor<string, []>("op_8646_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8646_cast_fp16 = einsum(equation = var_8646_equation_0, values = (var_8558_cast_fp16_3, var_8622_cast_fp16))[name = tensor<string, []>("op_8646_cast_fp16")];
+            tensor<string, []> var_8648_equation_0 = const()[name = tensor<string, []>("op_8648_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8648_cast_fp16 = einsum(equation = var_8648_equation_0, values = (var_8558_cast_fp16_4, var_8623_cast_fp16))[name = tensor<string, []>("op_8648_cast_fp16")];
+            tensor<string, []> var_8650_equation_0 = const()[name = tensor<string, []>("op_8650_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8650_cast_fp16 = einsum(equation = var_8650_equation_0, values = (var_8558_cast_fp16_5, var_8624_cast_fp16))[name = tensor<string, []>("op_8650_cast_fp16")];
+            tensor<string, []> var_8652_equation_0 = const()[name = tensor<string, []>("op_8652_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8652_cast_fp16 = einsum(equation = var_8652_equation_0, values = (var_8558_cast_fp16_6, var_8625_cast_fp16))[name = tensor<string, []>("op_8652_cast_fp16")];
+            tensor<string, []> var_8654_equation_0 = const()[name = tensor<string, []>("op_8654_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8654_cast_fp16 = einsum(equation = var_8654_equation_0, values = (var_8558_cast_fp16_7, var_8626_cast_fp16))[name = tensor<string, []>("op_8654_cast_fp16")];
+            tensor<string, []> var_8656_equation_0 = const()[name = tensor<string, []>("op_8656_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8656_cast_fp16 = einsum(equation = var_8656_equation_0, values = (var_8558_cast_fp16_8, var_8627_cast_fp16))[name = tensor<string, []>("op_8656_cast_fp16")];
+            tensor<string, []> var_8658_equation_0 = const()[name = tensor<string, []>("op_8658_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8658_cast_fp16 = einsum(equation = var_8658_equation_0, values = (var_8558_cast_fp16_9, var_8628_cast_fp16))[name = tensor<string, []>("op_8658_cast_fp16")];
+            tensor<string, []> var_8660_equation_0 = const()[name = tensor<string, []>("op_8660_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8660_cast_fp16 = einsum(equation = var_8660_equation_0, values = (var_8558_cast_fp16_10, var_8629_cast_fp16))[name = tensor<string, []>("op_8660_cast_fp16")];
+            tensor<string, []> var_8662_equation_0 = const()[name = tensor<string, []>("op_8662_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8662_cast_fp16 = einsum(equation = var_8662_equation_0, values = (var_8558_cast_fp16_11, var_8630_cast_fp16))[name = tensor<string, []>("op_8662_cast_fp16")];
+            tensor<string, []> var_8664_equation_0 = const()[name = tensor<string, []>("op_8664_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8664_cast_fp16 = einsum(equation = var_8664_equation_0, values = (var_8558_cast_fp16_12, var_8631_cast_fp16))[name = tensor<string, []>("op_8664_cast_fp16")];
+            tensor<string, []> var_8666_equation_0 = const()[name = tensor<string, []>("op_8666_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8666_cast_fp16 = einsum(equation = var_8666_equation_0, values = (var_8558_cast_fp16_13, var_8632_cast_fp16))[name = tensor<string, []>("op_8666_cast_fp16")];
+            tensor<string, []> var_8668_equation_0 = const()[name = tensor<string, []>("op_8668_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8668_cast_fp16 = einsum(equation = var_8668_equation_0, values = (var_8558_cast_fp16_14, var_8633_cast_fp16))[name = tensor<string, []>("op_8668_cast_fp16")];
+            tensor<string, []> var_8670_equation_0 = const()[name = tensor<string, []>("op_8670_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8670_cast_fp16 = einsum(equation = var_8670_equation_0, values = (var_8558_cast_fp16_15, var_8634_cast_fp16))[name = tensor<string, []>("op_8670_cast_fp16")];
+            tensor<string, []> var_8672_equation_0 = const()[name = tensor<string, []>("op_8672_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8672_cast_fp16 = einsum(equation = var_8672_equation_0, values = (var_8558_cast_fp16_16, var_8635_cast_fp16))[name = tensor<string, []>("op_8672_cast_fp16")];
+            tensor<string, []> var_8674_equation_0 = const()[name = tensor<string, []>("op_8674_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8674_cast_fp16 = einsum(equation = var_8674_equation_0, values = (var_8558_cast_fp16_17, var_8636_cast_fp16))[name = tensor<string, []>("op_8674_cast_fp16")];
+            tensor<string, []> var_8676_equation_0 = const()[name = tensor<string, []>("op_8676_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8676_cast_fp16 = einsum(equation = var_8676_equation_0, values = (var_8558_cast_fp16_18, var_8637_cast_fp16))[name = tensor<string, []>("op_8676_cast_fp16")];
+            tensor<string, []> var_8678_equation_0 = const()[name = tensor<string, []>("op_8678_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8678_cast_fp16 = einsum(equation = var_8678_equation_0, values = (var_8558_cast_fp16_19, var_8638_cast_fp16))[name = tensor<string, []>("op_8678_cast_fp16")];
+            tensor<bool, []> input_315_interleave_0 = const()[name = tensor<string, []>("input_315_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_315_cast_fp16 = concat(axis = var_8463, interleave = input_315_interleave_0, values = (var_8640_cast_fp16, var_8642_cast_fp16, var_8644_cast_fp16, var_8646_cast_fp16, var_8648_cast_fp16, var_8650_cast_fp16, var_8652_cast_fp16, var_8654_cast_fp16, var_8656_cast_fp16, var_8658_cast_fp16, var_8660_cast_fp16, var_8662_cast_fp16, var_8664_cast_fp16, var_8666_cast_fp16, var_8668_cast_fp16, var_8670_cast_fp16, var_8672_cast_fp16, var_8674_cast_fp16, var_8676_cast_fp16, var_8678_cast_fp16))[name = tensor<string, []>("input_315_cast_fp16")];
+            tensor<string, []> var_8687_pad_type_0 = const()[name = tensor<string, []>("op_8687_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8687_strides_0 = const()[name = tensor<string, []>("op_8687_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8687_pad_0 = const()[name = tensor<string, []>("op_8687_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8687_dilations_0 = const()[name = tensor<string, []>("op_8687_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8687_groups_0 = const()[name = tensor<string, []>("op_8687_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_31_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_31_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1244083072)))];
+            tensor<fp16, [1280]> blocks_31_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_31_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1247359936)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8687_cast_fp16 = conv(bias = blocks_31_attn_out_bias_to_fp16, dilations = var_8687_dilations_0, groups = var_8687_groups_0, pad = var_8687_pad_0, pad_type = var_8687_pad_type_0, strides = var_8687_strides_0, weight = blocks_31_attn_out_weight_to_fp16, x = input_315_cast_fp16)[name = tensor<string, []>("op_8687_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_127_cast_fp16 = add(x = inputs_125_cast_fp16, y = var_8687_cast_fp16)[name = tensor<string, []>("inputs_127_cast_fp16")];
+            tensor<int32, [1]> input_317_axes_0 = const()[name = tensor<string, []>("input_317_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_317_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_317_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1247362560)))];
+            tensor<fp16, [1280]> input_317_beta_0_to_fp16 = const()[name = tensor<string, []>("input_317_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1247365184)))];
+            tensor<fp16, []> var_8697_to_fp16 = const()[name = tensor<string, []>("op_8697_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_317_cast_fp16 = layer_norm(axes = input_317_axes_0, beta = input_317_beta_0_to_fp16, epsilon = var_8697_to_fp16, gamma = input_317_gamma_0_to_fp16, x = inputs_127_cast_fp16)[name = tensor<string, []>("input_317_cast_fp16")];
+            tensor<string, []> input_319_pad_type_0 = const()[name = tensor<string, []>("input_319_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_319_strides_0 = const()[name = tensor<string, []>("input_319_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_319_pad_0 = const()[name = tensor<string, []>("input_319_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_319_dilations_0 = const()[name = tensor<string, []>("input_319_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_319_groups_0 = const()[name = tensor<string, []>("input_319_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_31_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_31_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1247367808)))];
+            tensor<fp16, [5120]> blocks_31_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_31_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1260475072)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_319_cast_fp16 = conv(bias = blocks_31_mlp_0_bias_to_fp16, dilations = input_319_dilations_0, groups = input_319_groups_0, pad = input_319_pad_0, pad_type = input_319_pad_type_0, strides = input_319_strides_0, weight = blocks_31_mlp_0_weight_to_fp16, x = input_317_cast_fp16)[name = tensor<string, []>("input_319_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_319_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<string, []> var_8723_pad_type_0 = const()[name = tensor<string, []>("op_8723_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8723_strides_0 = const()[name = tensor<string, []>("op_8723_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8723_pad_0 = const()[name = tensor<string, []>("op_8723_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8723_dilations_0 = const()[name = tensor<string, []>("op_8723_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8723_groups_0 = const()[name = tensor<string, []>("op_8723_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_31_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_31_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1260485376)))];
+            tensor<fp16, [1280]> blocks_31_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_31_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1273592640)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8723_cast_fp16 = conv(bias = blocks_31_mlp_2_bias_to_fp16, dilations = var_8723_dilations_0, groups = var_8723_groups_0, pad = var_8723_pad_0, pad_type = var_8723_pad_type_0, strides = var_8723_strides_0, weight = blocks_31_mlp_2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("op_8723_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_cast_fp16 = add(x = inputs_127_cast_fp16, y = var_8723_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<int32, [1]> x_axes_0 = const()[name = tensor<string, []>("x_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> x_gamma_0_to_fp16 = const()[name = tensor<string, []>("x_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1273595264)))];
+            tensor<fp16, [1280]> x_beta_0_to_fp16 = const()[name = tensor<string, []>("x_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1273597888)))];
+            tensor<fp16, []> var_8737_to_fp16 = const()[name = tensor<string, []>("op_8737_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> x_cast_fp16 = layer_norm(axes = x_axes_0, beta = x_beta_0_to_fp16, epsilon = var_8737_to_fp16, gamma = x_gamma_0_to_fp16, x = inputs_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
+            tensor<int32, [1]> var_8748_axes_0 = const()[name = tensor<string, []>("op_8748_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1280, 1500]> var_8748_cast_fp16 = squeeze(axes = var_8748_axes_0, x = x_cast_fp16)[name = tensor<string, []>("op_8748_cast_fp16")];
+            tensor<int32, [3]> var_8751_perm_0 = const()[name = tensor<string, []>("op_8751_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<string, []> var_8751_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_8751_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
+            tensor<fp16, [1, 1500, 1280]> var_8751_cast_fp16 = transpose(perm = var_8751_perm_0, x = var_8748_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp32, [1, 1500, 1280]> output = cast(dtype = var_8751_cast_fp16_to_fp32_dtype_0, x = var_8751_cast_fp16)[name = tensor<string, []>("cast_131")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/large-v2/ggml-large-v2-encoder.mlmodelc/weights/weight.bin b/large-v2/ggml-large-v2-encoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c39236e469b018c8f5efe3a0bbf23f40a7b4d17f
--- /dev/null
+++ b/large-v2/ggml-large-v2-encoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b0425ba64707bb4c9727963b5e2b0ee1e8ddece596a56c7795aca0d966614b2
+size 1273600512
diff --git a/large-v2/ggml-large-v2.bin b/large-v2/ggml-large-v2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..649aafd67e30021d0140c24342ee2ffb947f4bde
--- /dev/null
+++ b/large-v2/ggml-large-v2.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a423fe4d40c82774b6af34115b8b935f34152246eb19e80e376071d3f999487
+size 3094623691
diff --git a/large-v3/.DS_Store b/large-v3/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..51b786d7ea7c568805d5c86a53eea79d96e02399
Binary files /dev/null and b/large-v3/.DS_Store differ
diff --git a/large-v3/ggml-large-v3-encoder.mlmodelc/analytics/coremldata.bin b/large-v3/ggml-large-v3-encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fdbd526d7fb5a2e38e143d34c21be58925d0e888
--- /dev/null
+++ b/large-v3/ggml-large-v3-encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7cfed2b3afbbf8fa7445b0ec0424e702b83c673568ddfdaa74417d8110c109c
+size 243
diff --git a/large-v3/ggml-large-v3-encoder.mlmodelc/coremldata.bin b/large-v3/ggml-large-v3-encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c584a477784aac75562de2c7d88f8c7b741e78d9
--- /dev/null
+++ b/large-v3/ggml-large-v3-encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f9fdf4bc4270b79aa10e04c95098f9c8f171b63ca54753fbdfd1724582324db
+size 321
diff --git a/large-v3/ggml-large-v3-encoder.mlmodelc/metadata.json b/large-v3/ggml-large-v3-encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbdcd27aa4cd56870b73a4fbf017356adea727f7
--- /dev/null
+++ b/large-v3/ggml-large-v3-encoder.mlmodelc/metadata.json
@@ -0,0 +1,71 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1500 × 1280)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 1280]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 6,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 32,
+      "Gelu" : 34,
+      "LayerNorm" : 65,
+      "Transpose" : 33,
+      "Softmax" : 640,
+      "Squeeze" : 1,
+      "Cast" : 2,
+      "Add" : 65,
+      "Einsum" : 1280,
+      "ExpandDims" : 1,
+      "Split" : 96,
+      "Conv" : 194
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "12.0",
+      "tvOS" : "15.0",
+      "visionOS" : "1.0",
+      "watchOS" : "8.0",
+      "iOS" : "15.0",
+      "macCatalyst" : "15.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.version" : "8.3.0",
+      "com.github.apple.coremltools.source" : "torch==2.2.2",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 128 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 128, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "coreml_encoder_large_v3",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/large-v3/ggml-large-v3-encoder.mlmodelc/model.mil b/large-v3/ggml-large-v3-encoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..8dd576adade7bc3f1f73e1bc769ae4fbbf686045
--- /dev/null
+++ b/large-v3/ggml-large-v3-encoder.mlmodelc/model.mil
@@ -0,0 +1,5643 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.2.2"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
+{
+    func main<ios15>(tensor<fp32, [1, 128, 3000]> logmel_data) {
+            tensor<string, []> var_84_pad_type_0 = const()[name = tensor<string, []>("op_84_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_84_pad_0 = const()[name = tensor<string, []>("op_84_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_84_strides_0 = const()[name = tensor<string, []>("op_84_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_84_dilations_0 = const()[name = tensor<string, []>("op_84_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_84_groups_0 = const()[name = tensor<string, []>("op_84_groups_0"), val = tensor<int32, []>(1)];
+            tensor<string, []> logmel_data_to_fp16_dtype_0 = const()[name = tensor<string, []>("logmel_data_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [1280, 128, 3]> const_0_to_fp16 = const()[name = tensor<string, []>("const_0_to_fp16"), val = tensor<fp16, [1280, 128, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1280]> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(983168)))];
+            tensor<fp16, [1, 128, 3000]> logmel_data_to_fp16 = cast(dtype = logmel_data_to_fp16_dtype_0, x = logmel_data)[name = tensor<string, []>("cast_132")];
+            tensor<fp16, [1, 1280, 3000]> var_84_cast_fp16 = conv(bias = const_1_to_fp16, dilations = var_84_dilations_0, groups = var_84_groups_0, pad = var_84_pad_0, pad_type = var_84_pad_type_0, strides = var_84_strides_0, weight = const_0_to_fp16, x = logmel_data_to_fp16)[name = tensor<string, []>("op_84_cast_fp16")];
+            tensor<string, []> input_1_mode_0 = const()[name = tensor<string, []>("input_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1280, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_84_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<string, []> var_102_pad_type_0 = const()[name = tensor<string, []>("op_102_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_102_pad_0 = const()[name = tensor<string, []>("op_102_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_102_strides_0 = const()[name = tensor<string, []>("op_102_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_102_dilations_0 = const()[name = tensor<string, []>("op_102_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_102_groups_0 = const()[name = tensor<string, []>("op_102_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 3]> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, [1280, 1280, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(985792)))];
+            tensor<fp16, [1280]> const_3_to_fp16 = const()[name = tensor<string, []>("const_3_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10816256)))];
+            tensor<fp16, [1, 1280, 1500]> var_102_cast_fp16 = conv(bias = const_3_to_fp16, dilations = var_102_dilations_0, groups = var_102_groups_0, pad = var_102_pad_0, pad_type = var_102_pad_type_0, strides = var_102_strides_0, weight = const_2_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("op_102_cast_fp16")];
+            tensor<string, []> x_3_mode_0 = const()[name = tensor<string, []>("x_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1280, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_102_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
+            tensor<fp16, [1280, 1500]> var_107_to_fp16 = const()[name = tensor<string, []>("op_107_to_fp16"), val = tensor<fp16, [1280, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10818880)))];
+            tensor<fp16, [1, 1280, 1500]> var_109_cast_fp16 = add(x = x_3_cast_fp16, y = var_107_to_fp16)[name = tensor<string, []>("op_109_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_109_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_124 = const()[name = tensor<string, []>("op_124"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14658944)))];
+            tensor<fp16, [1280]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14661568)))];
+            tensor<fp16, []> var_140_to_fp16 = const()[name = tensor<string, []>("op_140_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_3_cast_fp16 = layer_norm(axes = input_3_axes_0, beta = input_3_beta_0_to_fp16, epsilon = var_140_to_fp16, gamma = input_3_gamma_0_to_fp16, x = inputs_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<string, []> q_1_pad_type_0 = const()[name = tensor<string, []>("q_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_1_strides_0 = const()[name = tensor<string, []>("q_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_1_pad_0 = const()[name = tensor<string, []>("q_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_1_dilations_0 = const()[name = tensor<string, []>("q_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_1_groups_0 = const()[name = tensor<string, []>("q_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_175_weight_0_to_fp16 = const()[name = tensor<string, []>("op_175_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14664192)))];
+            tensor<fp16, [1280]> var_175_bias_0_to_fp16 = const()[name = tensor<string, []>("op_175_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17941056)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_175_cast_fp16 = conv(bias = var_175_bias_0_to_fp16, dilations = q_1_dilations_0, groups = q_1_groups_0, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = q_1_strides_0, weight = var_175_weight_0_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_175_cast_fp16")];
+            tensor<string, []> k_1_pad_type_0 = const()[name = tensor<string, []>("k_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_1_strides_0 = const()[name = tensor<string, []>("k_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_1_pad_0 = const()[name = tensor<string, []>("k_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_1_dilations_0 = const()[name = tensor<string, []>("k_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_1_groups_0 = const()[name = tensor<string, []>("k_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_0_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17943680)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_1_cast_fp16 = conv(dilations = k_1_dilations_0, groups = k_1_groups_0, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = k_1_strides_0, weight = blocks_0_attn_key_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("k_1_cast_fp16")];
+            tensor<string, []> var_173_pad_type_0 = const()[name = tensor<string, []>("op_173_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_173_strides_0 = const()[name = tensor<string, []>("op_173_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_173_pad_0 = const()[name = tensor<string, []>("op_173_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_173_dilations_0 = const()[name = tensor<string, []>("op_173_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_173_groups_0 = const()[name = tensor<string, []>("op_173_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_0_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21220544)))];
+            tensor<fp16, [1280]> blocks_0_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24497408)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_173_cast_fp16 = conv(bias = blocks_0_attn_value_bias_to_fp16, dilations = var_173_dilations_0, groups = var_173_groups_0, pad = var_173_pad_0, pad_type = var_173_pad_type_0, strides = var_173_strides_0, weight = blocks_0_attn_value_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_173_cast_fp16")];
+            tensor<int32, [20]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_176_axis_0 = const()[name = tensor<string, []>("op_176_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_176_cast_fp16_19 = split(axis = var_176_axis_0, split_sizes = tile_0, x = var_175_cast_fp16)[name = tensor<string, []>("op_176_cast_fp16")];
+            tensor<int32, [4]> var_197_perm_0 = const()[name = tensor<string, []>("op_197_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_198_axis_0 = const()[name = tensor<string, []>("op_198_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_197_cast_fp16 = transpose(perm = var_197_perm_0, x = k_1_cast_fp16)[name = tensor<string, []>("transpose_32")];
+            tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_198_cast_fp16_19 = split(axis = var_198_axis_0, split_sizes = tile_1, x = var_197_cast_fp16)[name = tensor<string, []>("op_198_cast_fp16")];
+            tensor<int32, [20]> tile_2 = const()[name = tensor<string, []>("tile_2"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_219_axis_0 = const()[name = tensor<string, []>("op_219_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16_19 = split(axis = var_219_axis_0, split_sizes = tile_2, x = var_173_cast_fp16)[name = tensor<string, []>("op_219_cast_fp16")];
+            tensor<string, []> aw_1_equation_0 = const()[name = tensor<string, []>("aw_1_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1_cast_fp16 = einsum(equation = aw_1_equation_0, values = (var_198_cast_fp16_0, var_176_cast_fp16_0))[name = tensor<string, []>("aw_1_cast_fp16")];
+            tensor<string, []> aw_3_equation_0 = const()[name = tensor<string, []>("aw_3_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_3_cast_fp16 = einsum(equation = aw_3_equation_0, values = (var_198_cast_fp16_1, var_176_cast_fp16_1))[name = tensor<string, []>("aw_3_cast_fp16")];
+            tensor<string, []> aw_5_equation_0 = const()[name = tensor<string, []>("aw_5_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_5_cast_fp16 = einsum(equation = aw_5_equation_0, values = (var_198_cast_fp16_2, var_176_cast_fp16_2))[name = tensor<string, []>("aw_5_cast_fp16")];
+            tensor<string, []> aw_7_equation_0 = const()[name = tensor<string, []>("aw_7_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_7_cast_fp16 = einsum(equation = aw_7_equation_0, values = (var_198_cast_fp16_3, var_176_cast_fp16_3))[name = tensor<string, []>("aw_7_cast_fp16")];
+            tensor<string, []> aw_9_equation_0 = const()[name = tensor<string, []>("aw_9_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_9_cast_fp16 = einsum(equation = aw_9_equation_0, values = (var_198_cast_fp16_4, var_176_cast_fp16_4))[name = tensor<string, []>("aw_9_cast_fp16")];
+            tensor<string, []> aw_11_equation_0 = const()[name = tensor<string, []>("aw_11_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_11_cast_fp16 = einsum(equation = aw_11_equation_0, values = (var_198_cast_fp16_5, var_176_cast_fp16_5))[name = tensor<string, []>("aw_11_cast_fp16")];
+            tensor<string, []> aw_13_equation_0 = const()[name = tensor<string, []>("aw_13_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_13_cast_fp16 = einsum(equation = aw_13_equation_0, values = (var_198_cast_fp16_6, var_176_cast_fp16_6))[name = tensor<string, []>("aw_13_cast_fp16")];
+            tensor<string, []> aw_15_equation_0 = const()[name = tensor<string, []>("aw_15_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_15_cast_fp16 = einsum(equation = aw_15_equation_0, values = (var_198_cast_fp16_7, var_176_cast_fp16_7))[name = tensor<string, []>("aw_15_cast_fp16")];
+            tensor<string, []> aw_17_equation_0 = const()[name = tensor<string, []>("aw_17_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_17_cast_fp16 = einsum(equation = aw_17_equation_0, values = (var_198_cast_fp16_8, var_176_cast_fp16_8))[name = tensor<string, []>("aw_17_cast_fp16")];
+            tensor<string, []> aw_19_equation_0 = const()[name = tensor<string, []>("aw_19_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_19_cast_fp16 = einsum(equation = aw_19_equation_0, values = (var_198_cast_fp16_9, var_176_cast_fp16_9))[name = tensor<string, []>("aw_19_cast_fp16")];
+            tensor<string, []> aw_21_equation_0 = const()[name = tensor<string, []>("aw_21_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_21_cast_fp16 = einsum(equation = aw_21_equation_0, values = (var_198_cast_fp16_10, var_176_cast_fp16_10))[name = tensor<string, []>("aw_21_cast_fp16")];
+            tensor<string, []> aw_23_equation_0 = const()[name = tensor<string, []>("aw_23_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_23_cast_fp16 = einsum(equation = aw_23_equation_0, values = (var_198_cast_fp16_11, var_176_cast_fp16_11))[name = tensor<string, []>("aw_23_cast_fp16")];
+            tensor<string, []> aw_25_equation_0 = const()[name = tensor<string, []>("aw_25_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_25_cast_fp16 = einsum(equation = aw_25_equation_0, values = (var_198_cast_fp16_12, var_176_cast_fp16_12))[name = tensor<string, []>("aw_25_cast_fp16")];
+            tensor<string, []> aw_27_equation_0 = const()[name = tensor<string, []>("aw_27_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_27_cast_fp16 = einsum(equation = aw_27_equation_0, values = (var_198_cast_fp16_13, var_176_cast_fp16_13))[name = tensor<string, []>("aw_27_cast_fp16")];
+            tensor<string, []> aw_29_equation_0 = const()[name = tensor<string, []>("aw_29_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_29_cast_fp16 = einsum(equation = aw_29_equation_0, values = (var_198_cast_fp16_14, var_176_cast_fp16_14))[name = tensor<string, []>("aw_29_cast_fp16")];
+            tensor<string, []> aw_31_equation_0 = const()[name = tensor<string, []>("aw_31_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_31_cast_fp16 = einsum(equation = aw_31_equation_0, values = (var_198_cast_fp16_15, var_176_cast_fp16_15))[name = tensor<string, []>("aw_31_cast_fp16")];
+            tensor<string, []> aw_33_equation_0 = const()[name = tensor<string, []>("aw_33_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_33_cast_fp16 = einsum(equation = aw_33_equation_0, values = (var_198_cast_fp16_16, var_176_cast_fp16_16))[name = tensor<string, []>("aw_33_cast_fp16")];
+            tensor<string, []> aw_35_equation_0 = const()[name = tensor<string, []>("aw_35_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_35_cast_fp16 = einsum(equation = aw_35_equation_0, values = (var_198_cast_fp16_17, var_176_cast_fp16_17))[name = tensor<string, []>("aw_35_cast_fp16")];
+            tensor<string, []> aw_37_equation_0 = const()[name = tensor<string, []>("aw_37_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_37_cast_fp16 = einsum(equation = aw_37_equation_0, values = (var_198_cast_fp16_18, var_176_cast_fp16_18))[name = tensor<string, []>("aw_37_cast_fp16")];
+            tensor<string, []> aw_39_equation_0 = const()[name = tensor<string, []>("aw_39_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_39_cast_fp16 = einsum(equation = aw_39_equation_0, values = (var_198_cast_fp16_19, var_176_cast_fp16_19))[name = tensor<string, []>("aw_39_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_280_cast_fp16 = softmax(axis = var_124, x = aw_1_cast_fp16)[name = tensor<string, []>("op_280_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_281_cast_fp16 = softmax(axis = var_124, x = aw_3_cast_fp16)[name = tensor<string, []>("op_281_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_282_cast_fp16 = softmax(axis = var_124, x = aw_5_cast_fp16)[name = tensor<string, []>("op_282_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_283_cast_fp16 = softmax(axis = var_124, x = aw_7_cast_fp16)[name = tensor<string, []>("op_283_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_284_cast_fp16 = softmax(axis = var_124, x = aw_9_cast_fp16)[name = tensor<string, []>("op_284_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_285_cast_fp16 = softmax(axis = var_124, x = aw_11_cast_fp16)[name = tensor<string, []>("op_285_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_286_cast_fp16 = softmax(axis = var_124, x = aw_13_cast_fp16)[name = tensor<string, []>("op_286_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_287_cast_fp16 = softmax(axis = var_124, x = aw_15_cast_fp16)[name = tensor<string, []>("op_287_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_288_cast_fp16 = softmax(axis = var_124, x = aw_17_cast_fp16)[name = tensor<string, []>("op_288_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_289_cast_fp16 = softmax(axis = var_124, x = aw_19_cast_fp16)[name = tensor<string, []>("op_289_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_290_cast_fp16 = softmax(axis = var_124, x = aw_21_cast_fp16)[name = tensor<string, []>("op_290_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_291_cast_fp16 = softmax(axis = var_124, x = aw_23_cast_fp16)[name = tensor<string, []>("op_291_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_292_cast_fp16 = softmax(axis = var_124, x = aw_25_cast_fp16)[name = tensor<string, []>("op_292_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_293_cast_fp16 = softmax(axis = var_124, x = aw_27_cast_fp16)[name = tensor<string, []>("op_293_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_294_cast_fp16 = softmax(axis = var_124, x = aw_29_cast_fp16)[name = tensor<string, []>("op_294_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_295_cast_fp16 = softmax(axis = var_124, x = aw_31_cast_fp16)[name = tensor<string, []>("op_295_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_296_cast_fp16 = softmax(axis = var_124, x = aw_33_cast_fp16)[name = tensor<string, []>("op_296_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_297_cast_fp16 = softmax(axis = var_124, x = aw_35_cast_fp16)[name = tensor<string, []>("op_297_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_298_cast_fp16 = softmax(axis = var_124, x = aw_37_cast_fp16)[name = tensor<string, []>("op_298_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_299_cast_fp16 = softmax(axis = var_124, x = aw_39_cast_fp16)[name = tensor<string, []>("op_299_cast_fp16")];
+            tensor<string, []> var_301_equation_0 = const()[name = tensor<string, []>("op_301_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_301_cast_fp16 = einsum(equation = var_301_equation_0, values = (var_219_cast_fp16_0, var_280_cast_fp16))[name = tensor<string, []>("op_301_cast_fp16")];
+            tensor<string, []> var_303_equation_0 = const()[name = tensor<string, []>("op_303_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_303_cast_fp16 = einsum(equation = var_303_equation_0, values = (var_219_cast_fp16_1, var_281_cast_fp16))[name = tensor<string, []>("op_303_cast_fp16")];
+            tensor<string, []> var_305_equation_0 = const()[name = tensor<string, []>("op_305_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_305_cast_fp16 = einsum(equation = var_305_equation_0, values = (var_219_cast_fp16_2, var_282_cast_fp16))[name = tensor<string, []>("op_305_cast_fp16")];
+            tensor<string, []> var_307_equation_0 = const()[name = tensor<string, []>("op_307_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_307_cast_fp16 = einsum(equation = var_307_equation_0, values = (var_219_cast_fp16_3, var_283_cast_fp16))[name = tensor<string, []>("op_307_cast_fp16")];
+            tensor<string, []> var_309_equation_0 = const()[name = tensor<string, []>("op_309_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_309_cast_fp16 = einsum(equation = var_309_equation_0, values = (var_219_cast_fp16_4, var_284_cast_fp16))[name = tensor<string, []>("op_309_cast_fp16")];
+            tensor<string, []> var_311_equation_0 = const()[name = tensor<string, []>("op_311_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_311_cast_fp16 = einsum(equation = var_311_equation_0, values = (var_219_cast_fp16_5, var_285_cast_fp16))[name = tensor<string, []>("op_311_cast_fp16")];
+            tensor<string, []> var_313_equation_0 = const()[name = tensor<string, []>("op_313_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_313_cast_fp16 = einsum(equation = var_313_equation_0, values = (var_219_cast_fp16_6, var_286_cast_fp16))[name = tensor<string, []>("op_313_cast_fp16")];
+            tensor<string, []> var_315_equation_0 = const()[name = tensor<string, []>("op_315_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_315_cast_fp16 = einsum(equation = var_315_equation_0, values = (var_219_cast_fp16_7, var_287_cast_fp16))[name = tensor<string, []>("op_315_cast_fp16")];
+            tensor<string, []> var_317_equation_0 = const()[name = tensor<string, []>("op_317_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_317_cast_fp16 = einsum(equation = var_317_equation_0, values = (var_219_cast_fp16_8, var_288_cast_fp16))[name = tensor<string, []>("op_317_cast_fp16")];
+            tensor<string, []> var_319_equation_0 = const()[name = tensor<string, []>("op_319_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_319_cast_fp16 = einsum(equation = var_319_equation_0, values = (var_219_cast_fp16_9, var_289_cast_fp16))[name = tensor<string, []>("op_319_cast_fp16")];
+            tensor<string, []> var_321_equation_0 = const()[name = tensor<string, []>("op_321_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_321_cast_fp16 = einsum(equation = var_321_equation_0, values = (var_219_cast_fp16_10, var_290_cast_fp16))[name = tensor<string, []>("op_321_cast_fp16")];
+            tensor<string, []> var_323_equation_0 = const()[name = tensor<string, []>("op_323_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_323_cast_fp16 = einsum(equation = var_323_equation_0, values = (var_219_cast_fp16_11, var_291_cast_fp16))[name = tensor<string, []>("op_323_cast_fp16")];
+            tensor<string, []> var_325_equation_0 = const()[name = tensor<string, []>("op_325_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_325_cast_fp16 = einsum(equation = var_325_equation_0, values = (var_219_cast_fp16_12, var_292_cast_fp16))[name = tensor<string, []>("op_325_cast_fp16")];
+            tensor<string, []> var_327_equation_0 = const()[name = tensor<string, []>("op_327_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_327_cast_fp16 = einsum(equation = var_327_equation_0, values = (var_219_cast_fp16_13, var_293_cast_fp16))[name = tensor<string, []>("op_327_cast_fp16")];
+            tensor<string, []> var_329_equation_0 = const()[name = tensor<string, []>("op_329_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_329_cast_fp16 = einsum(equation = var_329_equation_0, values = (var_219_cast_fp16_14, var_294_cast_fp16))[name = tensor<string, []>("op_329_cast_fp16")];
+            tensor<string, []> var_331_equation_0 = const()[name = tensor<string, []>("op_331_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_331_cast_fp16 = einsum(equation = var_331_equation_0, values = (var_219_cast_fp16_15, var_295_cast_fp16))[name = tensor<string, []>("op_331_cast_fp16")];
+            tensor<string, []> var_333_equation_0 = const()[name = tensor<string, []>("op_333_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_333_cast_fp16 = einsum(equation = var_333_equation_0, values = (var_219_cast_fp16_16, var_296_cast_fp16))[name = tensor<string, []>("op_333_cast_fp16")];
+            tensor<string, []> var_335_equation_0 = const()[name = tensor<string, []>("op_335_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_335_cast_fp16 = einsum(equation = var_335_equation_0, values = (var_219_cast_fp16_17, var_297_cast_fp16))[name = tensor<string, []>("op_335_cast_fp16")];
+            tensor<string, []> var_337_equation_0 = const()[name = tensor<string, []>("op_337_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_337_cast_fp16 = einsum(equation = var_337_equation_0, values = (var_219_cast_fp16_18, var_298_cast_fp16))[name = tensor<string, []>("op_337_cast_fp16")];
+            tensor<string, []> var_339_equation_0 = const()[name = tensor<string, []>("op_339_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_339_cast_fp16 = einsum(equation = var_339_equation_0, values = (var_219_cast_fp16_19, var_299_cast_fp16))[name = tensor<string, []>("op_339_cast_fp16")];
+            tensor<bool, []> input_5_interleave_0 = const()[name = tensor<string, []>("input_5_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_5_cast_fp16 = concat(axis = var_124, interleave = input_5_interleave_0, values = (var_301_cast_fp16, var_303_cast_fp16, var_305_cast_fp16, var_307_cast_fp16, var_309_cast_fp16, var_311_cast_fp16, var_313_cast_fp16, var_315_cast_fp16, var_317_cast_fp16, var_319_cast_fp16, var_321_cast_fp16, var_323_cast_fp16, var_325_cast_fp16, var_327_cast_fp16, var_329_cast_fp16, var_331_cast_fp16, var_333_cast_fp16, var_335_cast_fp16, var_337_cast_fp16, var_339_cast_fp16))[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> var_348_pad_type_0 = const()[name = tensor<string, []>("op_348_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_348_strides_0 = const()[name = tensor<string, []>("op_348_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_348_pad_0 = const()[name = tensor<string, []>("op_348_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_348_dilations_0 = const()[name = tensor<string, []>("op_348_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_348_groups_0 = const()[name = tensor<string, []>("op_348_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_0_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24500032)))];
+            tensor<fp16, [1280]> blocks_0_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27776896)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_348_cast_fp16 = conv(bias = blocks_0_attn_out_bias_to_fp16, dilations = var_348_dilations_0, groups = var_348_groups_0, pad = var_348_pad_0, pad_type = var_348_pad_type_0, strides = var_348_strides_0, weight = blocks_0_attn_out_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("op_348_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = var_348_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> input_7_axes_0 = const()[name = tensor<string, []>("input_7_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_7_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_7_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27779520)))];
+            tensor<fp16, [1280]> input_7_beta_0_to_fp16 = const()[name = tensor<string, []>("input_7_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27782144)))];
+            tensor<fp16, []> var_358_to_fp16 = const()[name = tensor<string, []>("op_358_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = input_7_beta_0_to_fp16, epsilon = var_358_to_fp16, gamma = input_7_gamma_0_to_fp16, x = inputs_3_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_pad_type_0 = const()[name = tensor<string, []>("input_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_9_strides_0 = const()[name = tensor<string, []>("input_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_9_pad_0 = const()[name = tensor<string, []>("input_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_9_dilations_0 = const()[name = tensor<string, []>("input_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_9_groups_0 = const()[name = tensor<string, []>("input_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_0_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(27784768)))];
+            tensor<fp16, [5120]> blocks_0_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40892032)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_9_cast_fp16 = conv(bias = blocks_0_mlp_0_bias_to_fp16, dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = blocks_0_mlp_0_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<string, []> input_11_mode_0 = const()[name = tensor<string, []>("input_11_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_11_cast_fp16 = gelu(mode = input_11_mode_0, x = input_9_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<string, []> var_384_pad_type_0 = const()[name = tensor<string, []>("op_384_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_384_strides_0 = const()[name = tensor<string, []>("op_384_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_384_pad_0 = const()[name = tensor<string, []>("op_384_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_384_dilations_0 = const()[name = tensor<string, []>("op_384_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_384_groups_0 = const()[name = tensor<string, []>("op_384_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_0_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40902336)))];
+            tensor<fp16, [1280]> blocks_0_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54009600)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_384_cast_fp16 = conv(bias = blocks_0_mlp_2_bias_to_fp16, dilations = var_384_dilations_0, groups = var_384_groups_0, pad = var_384_pad_0, pad_type = var_384_pad_type_0, strides = var_384_strides_0, weight = blocks_0_mlp_2_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("op_384_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = var_384_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_393 = const()[name = tensor<string, []>("op_393"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_13_axes_0 = const()[name = tensor<string, []>("input_13_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_13_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54012224)))];
+            tensor<fp16, [1280]> input_13_beta_0_to_fp16 = const()[name = tensor<string, []>("input_13_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54014848)))];
+            tensor<fp16, []> var_409_to_fp16 = const()[name = tensor<string, []>("op_409_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = input_13_beta_0_to_fp16, epsilon = var_409_to_fp16, gamma = input_13_gamma_0_to_fp16, x = inputs_5_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> q_3_pad_type_0 = const()[name = tensor<string, []>("q_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_3_strides_0 = const()[name = tensor<string, []>("q_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_3_pad_0 = const()[name = tensor<string, []>("q_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_3_dilations_0 = const()[name = tensor<string, []>("q_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_3_groups_0 = const()[name = tensor<string, []>("q_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_444_weight_0_to_fp16 = const()[name = tensor<string, []>("op_444_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54017472)))];
+            tensor<fp16, [1280]> var_444_bias_0_to_fp16 = const()[name = tensor<string, []>("op_444_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57294336)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_444_cast_fp16 = conv(bias = var_444_bias_0_to_fp16, dilations = q_3_dilations_0, groups = q_3_groups_0, pad = q_3_pad_0, pad_type = q_3_pad_type_0, strides = q_3_strides_0, weight = var_444_weight_0_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_444_cast_fp16")];
+            tensor<string, []> k_3_pad_type_0 = const()[name = tensor<string, []>("k_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_3_strides_0 = const()[name = tensor<string, []>("k_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_3_pad_0 = const()[name = tensor<string, []>("k_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_3_dilations_0 = const()[name = tensor<string, []>("k_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_3_groups_0 = const()[name = tensor<string, []>("k_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_1_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57296960)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_3_cast_fp16 = conv(dilations = k_3_dilations_0, groups = k_3_groups_0, pad = k_3_pad_0, pad_type = k_3_pad_type_0, strides = k_3_strides_0, weight = blocks_1_attn_key_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("k_3_cast_fp16")];
+            tensor<string, []> var_442_pad_type_0 = const()[name = tensor<string, []>("op_442_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_442_strides_0 = const()[name = tensor<string, []>("op_442_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_442_pad_0 = const()[name = tensor<string, []>("op_442_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_442_dilations_0 = const()[name = tensor<string, []>("op_442_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_442_groups_0 = const()[name = tensor<string, []>("op_442_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_1_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60573824)))];
+            tensor<fp16, [1280]> blocks_1_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63850688)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_442_cast_fp16 = conv(bias = blocks_1_attn_value_bias_to_fp16, dilations = var_442_dilations_0, groups = var_442_groups_0, pad = var_442_pad_0, pad_type = var_442_pad_type_0, strides = var_442_strides_0, weight = blocks_1_attn_value_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_442_cast_fp16")];
+            tensor<int32, [20]> tile_3 = const()[name = tensor<string, []>("tile_3"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_445_axis_0 = const()[name = tensor<string, []>("op_445_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_445_cast_fp16_19 = split(axis = var_445_axis_0, split_sizes = tile_3, x = var_444_cast_fp16)[name = tensor<string, []>("op_445_cast_fp16")];
+            tensor<int32, [4]> var_466_perm_0 = const()[name = tensor<string, []>("op_466_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_4 = const()[name = tensor<string, []>("tile_4"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_467_axis_0 = const()[name = tensor<string, []>("op_467_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_466_cast_fp16 = transpose(perm = var_466_perm_0, x = k_3_cast_fp16)[name = tensor<string, []>("transpose_31")];
+            tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_467_cast_fp16_19 = split(axis = var_467_axis_0, split_sizes = tile_4, x = var_466_cast_fp16)[name = tensor<string, []>("op_467_cast_fp16")];
+            tensor<int32, [20]> tile_5 = const()[name = tensor<string, []>("tile_5"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_488_axis_0 = const()[name = tensor<string, []>("op_488_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_488_cast_fp16_19 = split(axis = var_488_axis_0, split_sizes = tile_5, x = var_442_cast_fp16)[name = tensor<string, []>("op_488_cast_fp16")];
+            tensor<string, []> aw_41_equation_0 = const()[name = tensor<string, []>("aw_41_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_41_cast_fp16 = einsum(equation = aw_41_equation_0, values = (var_467_cast_fp16_0, var_445_cast_fp16_0))[name = tensor<string, []>("aw_41_cast_fp16")];
+            tensor<string, []> aw_43_equation_0 = const()[name = tensor<string, []>("aw_43_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_43_cast_fp16 = einsum(equation = aw_43_equation_0, values = (var_467_cast_fp16_1, var_445_cast_fp16_1))[name = tensor<string, []>("aw_43_cast_fp16")];
+            tensor<string, []> aw_45_equation_0 = const()[name = tensor<string, []>("aw_45_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_45_cast_fp16 = einsum(equation = aw_45_equation_0, values = (var_467_cast_fp16_2, var_445_cast_fp16_2))[name = tensor<string, []>("aw_45_cast_fp16")];
+            tensor<string, []> aw_47_equation_0 = const()[name = tensor<string, []>("aw_47_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_47_cast_fp16 = einsum(equation = aw_47_equation_0, values = (var_467_cast_fp16_3, var_445_cast_fp16_3))[name = tensor<string, []>("aw_47_cast_fp16")];
+            tensor<string, []> aw_49_equation_0 = const()[name = tensor<string, []>("aw_49_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_49_cast_fp16 = einsum(equation = aw_49_equation_0, values = (var_467_cast_fp16_4, var_445_cast_fp16_4))[name = tensor<string, []>("aw_49_cast_fp16")];
+            tensor<string, []> aw_51_equation_0 = const()[name = tensor<string, []>("aw_51_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_51_cast_fp16 = einsum(equation = aw_51_equation_0, values = (var_467_cast_fp16_5, var_445_cast_fp16_5))[name = tensor<string, []>("aw_51_cast_fp16")];
+            tensor<string, []> aw_53_equation_0 = const()[name = tensor<string, []>("aw_53_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_53_cast_fp16 = einsum(equation = aw_53_equation_0, values = (var_467_cast_fp16_6, var_445_cast_fp16_6))[name = tensor<string, []>("aw_53_cast_fp16")];
+            tensor<string, []> aw_55_equation_0 = const()[name = tensor<string, []>("aw_55_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_55_cast_fp16 = einsum(equation = aw_55_equation_0, values = (var_467_cast_fp16_7, var_445_cast_fp16_7))[name = tensor<string, []>("aw_55_cast_fp16")];
+            tensor<string, []> aw_57_equation_0 = const()[name = tensor<string, []>("aw_57_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_57_cast_fp16 = einsum(equation = aw_57_equation_0, values = (var_467_cast_fp16_8, var_445_cast_fp16_8))[name = tensor<string, []>("aw_57_cast_fp16")];
+            tensor<string, []> aw_59_equation_0 = const()[name = tensor<string, []>("aw_59_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_59_cast_fp16 = einsum(equation = aw_59_equation_0, values = (var_467_cast_fp16_9, var_445_cast_fp16_9))[name = tensor<string, []>("aw_59_cast_fp16")];
+            tensor<string, []> aw_61_equation_0 = const()[name = tensor<string, []>("aw_61_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_61_cast_fp16 = einsum(equation = aw_61_equation_0, values = (var_467_cast_fp16_10, var_445_cast_fp16_10))[name = tensor<string, []>("aw_61_cast_fp16")];
+            tensor<string, []> aw_63_equation_0 = const()[name = tensor<string, []>("aw_63_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_63_cast_fp16 = einsum(equation = aw_63_equation_0, values = (var_467_cast_fp16_11, var_445_cast_fp16_11))[name = tensor<string, []>("aw_63_cast_fp16")];
+            tensor<string, []> aw_65_equation_0 = const()[name = tensor<string, []>("aw_65_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_65_cast_fp16 = einsum(equation = aw_65_equation_0, values = (var_467_cast_fp16_12, var_445_cast_fp16_12))[name = tensor<string, []>("aw_65_cast_fp16")];
+            tensor<string, []> aw_67_equation_0 = const()[name = tensor<string, []>("aw_67_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_67_cast_fp16 = einsum(equation = aw_67_equation_0, values = (var_467_cast_fp16_13, var_445_cast_fp16_13))[name = tensor<string, []>("aw_67_cast_fp16")];
+            tensor<string, []> aw_69_equation_0 = const()[name = tensor<string, []>("aw_69_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_69_cast_fp16 = einsum(equation = aw_69_equation_0, values = (var_467_cast_fp16_14, var_445_cast_fp16_14))[name = tensor<string, []>("aw_69_cast_fp16")];
+            tensor<string, []> aw_71_equation_0 = const()[name = tensor<string, []>("aw_71_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_71_cast_fp16 = einsum(equation = aw_71_equation_0, values = (var_467_cast_fp16_15, var_445_cast_fp16_15))[name = tensor<string, []>("aw_71_cast_fp16")];
+            tensor<string, []> aw_73_equation_0 = const()[name = tensor<string, []>("aw_73_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_73_cast_fp16 = einsum(equation = aw_73_equation_0, values = (var_467_cast_fp16_16, var_445_cast_fp16_16))[name = tensor<string, []>("aw_73_cast_fp16")];
+            tensor<string, []> aw_75_equation_0 = const()[name = tensor<string, []>("aw_75_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_75_cast_fp16 = einsum(equation = aw_75_equation_0, values = (var_467_cast_fp16_17, var_445_cast_fp16_17))[name = tensor<string, []>("aw_75_cast_fp16")];
+            tensor<string, []> aw_77_equation_0 = const()[name = tensor<string, []>("aw_77_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_77_cast_fp16 = einsum(equation = aw_77_equation_0, values = (var_467_cast_fp16_18, var_445_cast_fp16_18))[name = tensor<string, []>("aw_77_cast_fp16")];
+            tensor<string, []> aw_79_equation_0 = const()[name = tensor<string, []>("aw_79_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_79_cast_fp16 = einsum(equation = aw_79_equation_0, values = (var_467_cast_fp16_19, var_445_cast_fp16_19))[name = tensor<string, []>("aw_79_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_549_cast_fp16 = softmax(axis = var_393, x = aw_41_cast_fp16)[name = tensor<string, []>("op_549_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_550_cast_fp16 = softmax(axis = var_393, x = aw_43_cast_fp16)[name = tensor<string, []>("op_550_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_551_cast_fp16 = softmax(axis = var_393, x = aw_45_cast_fp16)[name = tensor<string, []>("op_551_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_552_cast_fp16 = softmax(axis = var_393, x = aw_47_cast_fp16)[name = tensor<string, []>("op_552_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_553_cast_fp16 = softmax(axis = var_393, x = aw_49_cast_fp16)[name = tensor<string, []>("op_553_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_554_cast_fp16 = softmax(axis = var_393, x = aw_51_cast_fp16)[name = tensor<string, []>("op_554_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_555_cast_fp16 = softmax(axis = var_393, x = aw_53_cast_fp16)[name = tensor<string, []>("op_555_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_556_cast_fp16 = softmax(axis = var_393, x = aw_55_cast_fp16)[name = tensor<string, []>("op_556_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_557_cast_fp16 = softmax(axis = var_393, x = aw_57_cast_fp16)[name = tensor<string, []>("op_557_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_558_cast_fp16 = softmax(axis = var_393, x = aw_59_cast_fp16)[name = tensor<string, []>("op_558_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_559_cast_fp16 = softmax(axis = var_393, x = aw_61_cast_fp16)[name = tensor<string, []>("op_559_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_560_cast_fp16 = softmax(axis = var_393, x = aw_63_cast_fp16)[name = tensor<string, []>("op_560_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_561_cast_fp16 = softmax(axis = var_393, x = aw_65_cast_fp16)[name = tensor<string, []>("op_561_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_562_cast_fp16 = softmax(axis = var_393, x = aw_67_cast_fp16)[name = tensor<string, []>("op_562_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_563_cast_fp16 = softmax(axis = var_393, x = aw_69_cast_fp16)[name = tensor<string, []>("op_563_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_564_cast_fp16 = softmax(axis = var_393, x = aw_71_cast_fp16)[name = tensor<string, []>("op_564_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_565_cast_fp16 = softmax(axis = var_393, x = aw_73_cast_fp16)[name = tensor<string, []>("op_565_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_566_cast_fp16 = softmax(axis = var_393, x = aw_75_cast_fp16)[name = tensor<string, []>("op_566_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_567_cast_fp16 = softmax(axis = var_393, x = aw_77_cast_fp16)[name = tensor<string, []>("op_567_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_568_cast_fp16 = softmax(axis = var_393, x = aw_79_cast_fp16)[name = tensor<string, []>("op_568_cast_fp16")];
+            tensor<string, []> var_570_equation_0 = const()[name = tensor<string, []>("op_570_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_570_cast_fp16 = einsum(equation = var_570_equation_0, values = (var_488_cast_fp16_0, var_549_cast_fp16))[name = tensor<string, []>("op_570_cast_fp16")];
+            tensor<string, []> var_572_equation_0 = const()[name = tensor<string, []>("op_572_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_572_cast_fp16 = einsum(equation = var_572_equation_0, values = (var_488_cast_fp16_1, var_550_cast_fp16))[name = tensor<string, []>("op_572_cast_fp16")];
+            tensor<string, []> var_574_equation_0 = const()[name = tensor<string, []>("op_574_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_574_cast_fp16 = einsum(equation = var_574_equation_0, values = (var_488_cast_fp16_2, var_551_cast_fp16))[name = tensor<string, []>("op_574_cast_fp16")];
+            tensor<string, []> var_576_equation_0 = const()[name = tensor<string, []>("op_576_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_576_cast_fp16 = einsum(equation = var_576_equation_0, values = (var_488_cast_fp16_3, var_552_cast_fp16))[name = tensor<string, []>("op_576_cast_fp16")];
+            tensor<string, []> var_578_equation_0 = const()[name = tensor<string, []>("op_578_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_578_cast_fp16 = einsum(equation = var_578_equation_0, values = (var_488_cast_fp16_4, var_553_cast_fp16))[name = tensor<string, []>("op_578_cast_fp16")];
+            tensor<string, []> var_580_equation_0 = const()[name = tensor<string, []>("op_580_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_580_cast_fp16 = einsum(equation = var_580_equation_0, values = (var_488_cast_fp16_5, var_554_cast_fp16))[name = tensor<string, []>("op_580_cast_fp16")];
+            tensor<string, []> var_582_equation_0 = const()[name = tensor<string, []>("op_582_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_582_cast_fp16 = einsum(equation = var_582_equation_0, values = (var_488_cast_fp16_6, var_555_cast_fp16))[name = tensor<string, []>("op_582_cast_fp16")];
+            tensor<string, []> var_584_equation_0 = const()[name = tensor<string, []>("op_584_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_584_cast_fp16 = einsum(equation = var_584_equation_0, values = (var_488_cast_fp16_7, var_556_cast_fp16))[name = tensor<string, []>("op_584_cast_fp16")];
+            tensor<string, []> var_586_equation_0 = const()[name = tensor<string, []>("op_586_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_586_cast_fp16 = einsum(equation = var_586_equation_0, values = (var_488_cast_fp16_8, var_557_cast_fp16))[name = tensor<string, []>("op_586_cast_fp16")];
+            tensor<string, []> var_588_equation_0 = const()[name = tensor<string, []>("op_588_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_588_cast_fp16 = einsum(equation = var_588_equation_0, values = (var_488_cast_fp16_9, var_558_cast_fp16))[name = tensor<string, []>("op_588_cast_fp16")];
+            tensor<string, []> var_590_equation_0 = const()[name = tensor<string, []>("op_590_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_590_cast_fp16 = einsum(equation = var_590_equation_0, values = (var_488_cast_fp16_10, var_559_cast_fp16))[name = tensor<string, []>("op_590_cast_fp16")];
+            tensor<string, []> var_592_equation_0 = const()[name = tensor<string, []>("op_592_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_592_cast_fp16 = einsum(equation = var_592_equation_0, values = (var_488_cast_fp16_11, var_560_cast_fp16))[name = tensor<string, []>("op_592_cast_fp16")];
+            tensor<string, []> var_594_equation_0 = const()[name = tensor<string, []>("op_594_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_594_cast_fp16 = einsum(equation = var_594_equation_0, values = (var_488_cast_fp16_12, var_561_cast_fp16))[name = tensor<string, []>("op_594_cast_fp16")];
+            tensor<string, []> var_596_equation_0 = const()[name = tensor<string, []>("op_596_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_596_cast_fp16 = einsum(equation = var_596_equation_0, values = (var_488_cast_fp16_13, var_562_cast_fp16))[name = tensor<string, []>("op_596_cast_fp16")];
+            tensor<string, []> var_598_equation_0 = const()[name = tensor<string, []>("op_598_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_598_cast_fp16 = einsum(equation = var_598_equation_0, values = (var_488_cast_fp16_14, var_563_cast_fp16))[name = tensor<string, []>("op_598_cast_fp16")];
+            tensor<string, []> var_600_equation_0 = const()[name = tensor<string, []>("op_600_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_600_cast_fp16 = einsum(equation = var_600_equation_0, values = (var_488_cast_fp16_15, var_564_cast_fp16))[name = tensor<string, []>("op_600_cast_fp16")];
+            tensor<string, []> var_602_equation_0 = const()[name = tensor<string, []>("op_602_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_602_cast_fp16 = einsum(equation = var_602_equation_0, values = (var_488_cast_fp16_16, var_565_cast_fp16))[name = tensor<string, []>("op_602_cast_fp16")];
+            tensor<string, []> var_604_equation_0 = const()[name = tensor<string, []>("op_604_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_604_cast_fp16 = einsum(equation = var_604_equation_0, values = (var_488_cast_fp16_17, var_566_cast_fp16))[name = tensor<string, []>("op_604_cast_fp16")];
+            tensor<string, []> var_606_equation_0 = const()[name = tensor<string, []>("op_606_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_606_cast_fp16 = einsum(equation = var_606_equation_0, values = (var_488_cast_fp16_18, var_567_cast_fp16))[name = tensor<string, []>("op_606_cast_fp16")];
+            tensor<string, []> var_608_equation_0 = const()[name = tensor<string, []>("op_608_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_608_cast_fp16 = einsum(equation = var_608_equation_0, values = (var_488_cast_fp16_19, var_568_cast_fp16))[name = tensor<string, []>("op_608_cast_fp16")];
+            tensor<bool, []> input_15_interleave_0 = const()[name = tensor<string, []>("input_15_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_15_cast_fp16 = concat(axis = var_393, interleave = input_15_interleave_0, values = (var_570_cast_fp16, var_572_cast_fp16, var_574_cast_fp16, var_576_cast_fp16, var_578_cast_fp16, var_580_cast_fp16, var_582_cast_fp16, var_584_cast_fp16, var_586_cast_fp16, var_588_cast_fp16, var_590_cast_fp16, var_592_cast_fp16, var_594_cast_fp16, var_596_cast_fp16, var_598_cast_fp16, var_600_cast_fp16, var_602_cast_fp16, var_604_cast_fp16, var_606_cast_fp16, var_608_cast_fp16))[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<string, []> var_617_pad_type_0 = const()[name = tensor<string, []>("op_617_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_617_strides_0 = const()[name = tensor<string, []>("op_617_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_617_pad_0 = const()[name = tensor<string, []>("op_617_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_617_dilations_0 = const()[name = tensor<string, []>("op_617_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_617_groups_0 = const()[name = tensor<string, []>("op_617_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_1_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63853312)))];
+            tensor<fp16, [1280]> blocks_1_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67130176)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_617_cast_fp16 = conv(bias = blocks_1_attn_out_bias_to_fp16, dilations = var_617_dilations_0, groups = var_617_groups_0, pad = var_617_pad_0, pad_type = var_617_pad_type_0, strides = var_617_strides_0, weight = blocks_1_attn_out_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("op_617_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = var_617_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> input_17_axes_0 = const()[name = tensor<string, []>("input_17_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_17_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_17_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67132800)))];
+            tensor<fp16, [1280]> input_17_beta_0_to_fp16 = const()[name = tensor<string, []>("input_17_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67135424)))];
+            tensor<fp16, []> var_627_to_fp16 = const()[name = tensor<string, []>("op_627_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_17_cast_fp16 = layer_norm(axes = input_17_axes_0, beta = input_17_beta_0_to_fp16, epsilon = var_627_to_fp16, gamma = input_17_gamma_0_to_fp16, x = inputs_7_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_pad_type_0 = const()[name = tensor<string, []>("input_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_19_strides_0 = const()[name = tensor<string, []>("input_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_19_pad_0 = const()[name = tensor<string, []>("input_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_19_dilations_0 = const()[name = tensor<string, []>("input_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_19_groups_0 = const()[name = tensor<string, []>("input_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_1_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67138048)))];
+            tensor<fp16, [5120]> blocks_1_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80245312)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_19_cast_fp16 = conv(bias = blocks_1_mlp_0_bias_to_fp16, dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = blocks_1_mlp_0_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<string, []> input_21_mode_0 = const()[name = tensor<string, []>("input_21_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_21_cast_fp16 = gelu(mode = input_21_mode_0, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> var_653_pad_type_0 = const()[name = tensor<string, []>("op_653_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_653_strides_0 = const()[name = tensor<string, []>("op_653_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_653_pad_0 = const()[name = tensor<string, []>("op_653_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_653_dilations_0 = const()[name = tensor<string, []>("op_653_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_653_groups_0 = const()[name = tensor<string, []>("op_653_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_1_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80255616)))];
+            tensor<fp16, [1280]> blocks_1_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93362880)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_653_cast_fp16 = conv(bias = blocks_1_mlp_2_bias_to_fp16, dilations = var_653_dilations_0, groups = var_653_groups_0, pad = var_653_pad_0, pad_type = var_653_pad_type_0, strides = var_653_strides_0, weight = blocks_1_mlp_2_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("op_653_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = var_653_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_662 = const()[name = tensor<string, []>("op_662"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_23_axes_0 = const()[name = tensor<string, []>("input_23_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_23_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93365504)))];
+            tensor<fp16, [1280]> input_23_beta_0_to_fp16 = const()[name = tensor<string, []>("input_23_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93368128)))];
+            tensor<fp16, []> var_678_to_fp16 = const()[name = tensor<string, []>("op_678_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_23_cast_fp16 = layer_norm(axes = input_23_axes_0, beta = input_23_beta_0_to_fp16, epsilon = var_678_to_fp16, gamma = input_23_gamma_0_to_fp16, x = inputs_9_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<string, []> q_5_pad_type_0 = const()[name = tensor<string, []>("q_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_5_strides_0 = const()[name = tensor<string, []>("q_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_5_pad_0 = const()[name = tensor<string, []>("q_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_5_dilations_0 = const()[name = tensor<string, []>("q_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_5_groups_0 = const()[name = tensor<string, []>("q_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_713_weight_0_to_fp16 = const()[name = tensor<string, []>("op_713_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93370752)))];
+            tensor<fp16, [1280]> var_713_bias_0_to_fp16 = const()[name = tensor<string, []>("op_713_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96647616)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_713_cast_fp16 = conv(bias = var_713_bias_0_to_fp16, dilations = q_5_dilations_0, groups = q_5_groups_0, pad = q_5_pad_0, pad_type = q_5_pad_type_0, strides = q_5_strides_0, weight = var_713_weight_0_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_713_cast_fp16")];
+            tensor<string, []> k_5_pad_type_0 = const()[name = tensor<string, []>("k_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_5_strides_0 = const()[name = tensor<string, []>("k_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_5_pad_0 = const()[name = tensor<string, []>("k_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_5_dilations_0 = const()[name = tensor<string, []>("k_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_5_groups_0 = const()[name = tensor<string, []>("k_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_2_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(96650240)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_5_cast_fp16 = conv(dilations = k_5_dilations_0, groups = k_5_groups_0, pad = k_5_pad_0, pad_type = k_5_pad_type_0, strides = k_5_strides_0, weight = blocks_2_attn_key_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("k_5_cast_fp16")];
+            tensor<string, []> var_711_pad_type_0 = const()[name = tensor<string, []>("op_711_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_711_strides_0 = const()[name = tensor<string, []>("op_711_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_711_pad_0 = const()[name = tensor<string, []>("op_711_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_711_dilations_0 = const()[name = tensor<string, []>("op_711_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_711_groups_0 = const()[name = tensor<string, []>("op_711_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_2_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(99927104)))];
+            tensor<fp16, [1280]> blocks_2_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103203968)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_711_cast_fp16 = conv(bias = blocks_2_attn_value_bias_to_fp16, dilations = var_711_dilations_0, groups = var_711_groups_0, pad = var_711_pad_0, pad_type = var_711_pad_type_0, strides = var_711_strides_0, weight = blocks_2_attn_value_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_711_cast_fp16")];
+            tensor<int32, [20]> tile_6 = const()[name = tensor<string, []>("tile_6"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_714_axis_0 = const()[name = tensor<string, []>("op_714_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_714_cast_fp16_19 = split(axis = var_714_axis_0, split_sizes = tile_6, x = var_713_cast_fp16)[name = tensor<string, []>("op_714_cast_fp16")];
+            tensor<int32, [4]> var_735_perm_0 = const()[name = tensor<string, []>("op_735_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_7 = const()[name = tensor<string, []>("tile_7"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_736_axis_0 = const()[name = tensor<string, []>("op_736_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_735_cast_fp16 = transpose(perm = var_735_perm_0, x = k_5_cast_fp16)[name = tensor<string, []>("transpose_30")];
+            tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_736_cast_fp16_19 = split(axis = var_736_axis_0, split_sizes = tile_7, x = var_735_cast_fp16)[name = tensor<string, []>("op_736_cast_fp16")];
+            tensor<int32, [20]> tile_8 = const()[name = tensor<string, []>("tile_8"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_757_axis_0 = const()[name = tensor<string, []>("op_757_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16_19 = split(axis = var_757_axis_0, split_sizes = tile_8, x = var_711_cast_fp16)[name = tensor<string, []>("op_757_cast_fp16")];
+            tensor<string, []> aw_81_equation_0 = const()[name = tensor<string, []>("aw_81_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_81_cast_fp16 = einsum(equation = aw_81_equation_0, values = (var_736_cast_fp16_0, var_714_cast_fp16_0))[name = tensor<string, []>("aw_81_cast_fp16")];
+            tensor<string, []> aw_83_equation_0 = const()[name = tensor<string, []>("aw_83_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_83_cast_fp16 = einsum(equation = aw_83_equation_0, values = (var_736_cast_fp16_1, var_714_cast_fp16_1))[name = tensor<string, []>("aw_83_cast_fp16")];
+            tensor<string, []> aw_85_equation_0 = const()[name = tensor<string, []>("aw_85_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_85_cast_fp16 = einsum(equation = aw_85_equation_0, values = (var_736_cast_fp16_2, var_714_cast_fp16_2))[name = tensor<string, []>("aw_85_cast_fp16")];
+            tensor<string, []> aw_87_equation_0 = const()[name = tensor<string, []>("aw_87_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_87_cast_fp16 = einsum(equation = aw_87_equation_0, values = (var_736_cast_fp16_3, var_714_cast_fp16_3))[name = tensor<string, []>("aw_87_cast_fp16")];
+            tensor<string, []> aw_89_equation_0 = const()[name = tensor<string, []>("aw_89_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_89_cast_fp16 = einsum(equation = aw_89_equation_0, values = (var_736_cast_fp16_4, var_714_cast_fp16_4))[name = tensor<string, []>("aw_89_cast_fp16")];
+            tensor<string, []> aw_91_equation_0 = const()[name = tensor<string, []>("aw_91_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_91_cast_fp16 = einsum(equation = aw_91_equation_0, values = (var_736_cast_fp16_5, var_714_cast_fp16_5))[name = tensor<string, []>("aw_91_cast_fp16")];
+            tensor<string, []> aw_93_equation_0 = const()[name = tensor<string, []>("aw_93_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_93_cast_fp16 = einsum(equation = aw_93_equation_0, values = (var_736_cast_fp16_6, var_714_cast_fp16_6))[name = tensor<string, []>("aw_93_cast_fp16")];
+            tensor<string, []> aw_95_equation_0 = const()[name = tensor<string, []>("aw_95_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_95_cast_fp16 = einsum(equation = aw_95_equation_0, values = (var_736_cast_fp16_7, var_714_cast_fp16_7))[name = tensor<string, []>("aw_95_cast_fp16")];
+            tensor<string, []> aw_97_equation_0 = const()[name = tensor<string, []>("aw_97_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_97_cast_fp16 = einsum(equation = aw_97_equation_0, values = (var_736_cast_fp16_8, var_714_cast_fp16_8))[name = tensor<string, []>("aw_97_cast_fp16")];
+            tensor<string, []> aw_99_equation_0 = const()[name = tensor<string, []>("aw_99_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_99_cast_fp16 = einsum(equation = aw_99_equation_0, values = (var_736_cast_fp16_9, var_714_cast_fp16_9))[name = tensor<string, []>("aw_99_cast_fp16")];
+            tensor<string, []> aw_101_equation_0 = const()[name = tensor<string, []>("aw_101_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_101_cast_fp16 = einsum(equation = aw_101_equation_0, values = (var_736_cast_fp16_10, var_714_cast_fp16_10))[name = tensor<string, []>("aw_101_cast_fp16")];
+            tensor<string, []> aw_103_equation_0 = const()[name = tensor<string, []>("aw_103_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_103_cast_fp16 = einsum(equation = aw_103_equation_0, values = (var_736_cast_fp16_11, var_714_cast_fp16_11))[name = tensor<string, []>("aw_103_cast_fp16")];
+            tensor<string, []> aw_105_equation_0 = const()[name = tensor<string, []>("aw_105_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_105_cast_fp16 = einsum(equation = aw_105_equation_0, values = (var_736_cast_fp16_12, var_714_cast_fp16_12))[name = tensor<string, []>("aw_105_cast_fp16")];
+            tensor<string, []> aw_107_equation_0 = const()[name = tensor<string, []>("aw_107_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_107_cast_fp16 = einsum(equation = aw_107_equation_0, values = (var_736_cast_fp16_13, var_714_cast_fp16_13))[name = tensor<string, []>("aw_107_cast_fp16")];
+            tensor<string, []> aw_109_equation_0 = const()[name = tensor<string, []>("aw_109_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_109_cast_fp16 = einsum(equation = aw_109_equation_0, values = (var_736_cast_fp16_14, var_714_cast_fp16_14))[name = tensor<string, []>("aw_109_cast_fp16")];
+            tensor<string, []> aw_111_equation_0 = const()[name = tensor<string, []>("aw_111_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_111_cast_fp16 = einsum(equation = aw_111_equation_0, values = (var_736_cast_fp16_15, var_714_cast_fp16_15))[name = tensor<string, []>("aw_111_cast_fp16")];
+            tensor<string, []> aw_113_equation_0 = const()[name = tensor<string, []>("aw_113_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_113_cast_fp16 = einsum(equation = aw_113_equation_0, values = (var_736_cast_fp16_16, var_714_cast_fp16_16))[name = tensor<string, []>("aw_113_cast_fp16")];
+            tensor<string, []> aw_115_equation_0 = const()[name = tensor<string, []>("aw_115_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_115_cast_fp16 = einsum(equation = aw_115_equation_0, values = (var_736_cast_fp16_17, var_714_cast_fp16_17))[name = tensor<string, []>("aw_115_cast_fp16")];
+            tensor<string, []> aw_117_equation_0 = const()[name = tensor<string, []>("aw_117_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_117_cast_fp16 = einsum(equation = aw_117_equation_0, values = (var_736_cast_fp16_18, var_714_cast_fp16_18))[name = tensor<string, []>("aw_117_cast_fp16")];
+            tensor<string, []> aw_119_equation_0 = const()[name = tensor<string, []>("aw_119_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_119_cast_fp16 = einsum(equation = aw_119_equation_0, values = (var_736_cast_fp16_19, var_714_cast_fp16_19))[name = tensor<string, []>("aw_119_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_818_cast_fp16 = softmax(axis = var_662, x = aw_81_cast_fp16)[name = tensor<string, []>("op_818_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_819_cast_fp16 = softmax(axis = var_662, x = aw_83_cast_fp16)[name = tensor<string, []>("op_819_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_820_cast_fp16 = softmax(axis = var_662, x = aw_85_cast_fp16)[name = tensor<string, []>("op_820_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_821_cast_fp16 = softmax(axis = var_662, x = aw_87_cast_fp16)[name = tensor<string, []>("op_821_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_822_cast_fp16 = softmax(axis = var_662, x = aw_89_cast_fp16)[name = tensor<string, []>("op_822_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_823_cast_fp16 = softmax(axis = var_662, x = aw_91_cast_fp16)[name = tensor<string, []>("op_823_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_824_cast_fp16 = softmax(axis = var_662, x = aw_93_cast_fp16)[name = tensor<string, []>("op_824_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_825_cast_fp16 = softmax(axis = var_662, x = aw_95_cast_fp16)[name = tensor<string, []>("op_825_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_826_cast_fp16 = softmax(axis = var_662, x = aw_97_cast_fp16)[name = tensor<string, []>("op_826_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_827_cast_fp16 = softmax(axis = var_662, x = aw_99_cast_fp16)[name = tensor<string, []>("op_827_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_828_cast_fp16 = softmax(axis = var_662, x = aw_101_cast_fp16)[name = tensor<string, []>("op_828_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_829_cast_fp16 = softmax(axis = var_662, x = aw_103_cast_fp16)[name = tensor<string, []>("op_829_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_830_cast_fp16 = softmax(axis = var_662, x = aw_105_cast_fp16)[name = tensor<string, []>("op_830_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_831_cast_fp16 = softmax(axis = var_662, x = aw_107_cast_fp16)[name = tensor<string, []>("op_831_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_832_cast_fp16 = softmax(axis = var_662, x = aw_109_cast_fp16)[name = tensor<string, []>("op_832_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_833_cast_fp16 = softmax(axis = var_662, x = aw_111_cast_fp16)[name = tensor<string, []>("op_833_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_834_cast_fp16 = softmax(axis = var_662, x = aw_113_cast_fp16)[name = tensor<string, []>("op_834_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_835_cast_fp16 = softmax(axis = var_662, x = aw_115_cast_fp16)[name = tensor<string, []>("op_835_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_836_cast_fp16 = softmax(axis = var_662, x = aw_117_cast_fp16)[name = tensor<string, []>("op_836_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_837_cast_fp16 = softmax(axis = var_662, x = aw_119_cast_fp16)[name = tensor<string, []>("op_837_cast_fp16")];
+            tensor<string, []> var_839_equation_0 = const()[name = tensor<string, []>("op_839_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_839_cast_fp16 = einsum(equation = var_839_equation_0, values = (var_757_cast_fp16_0, var_818_cast_fp16))[name = tensor<string, []>("op_839_cast_fp16")];
+            tensor<string, []> var_841_equation_0 = const()[name = tensor<string, []>("op_841_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_841_cast_fp16 = einsum(equation = var_841_equation_0, values = (var_757_cast_fp16_1, var_819_cast_fp16))[name = tensor<string, []>("op_841_cast_fp16")];
+            tensor<string, []> var_843_equation_0 = const()[name = tensor<string, []>("op_843_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_843_cast_fp16 = einsum(equation = var_843_equation_0, values = (var_757_cast_fp16_2, var_820_cast_fp16))[name = tensor<string, []>("op_843_cast_fp16")];
+            tensor<string, []> var_845_equation_0 = const()[name = tensor<string, []>("op_845_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_845_cast_fp16 = einsum(equation = var_845_equation_0, values = (var_757_cast_fp16_3, var_821_cast_fp16))[name = tensor<string, []>("op_845_cast_fp16")];
+            tensor<string, []> var_847_equation_0 = const()[name = tensor<string, []>("op_847_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_847_cast_fp16 = einsum(equation = var_847_equation_0, values = (var_757_cast_fp16_4, var_822_cast_fp16))[name = tensor<string, []>("op_847_cast_fp16")];
+            tensor<string, []> var_849_equation_0 = const()[name = tensor<string, []>("op_849_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_849_cast_fp16 = einsum(equation = var_849_equation_0, values = (var_757_cast_fp16_5, var_823_cast_fp16))[name = tensor<string, []>("op_849_cast_fp16")];
+            tensor<string, []> var_851_equation_0 = const()[name = tensor<string, []>("op_851_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_851_cast_fp16 = einsum(equation = var_851_equation_0, values = (var_757_cast_fp16_6, var_824_cast_fp16))[name = tensor<string, []>("op_851_cast_fp16")];
+            tensor<string, []> var_853_equation_0 = const()[name = tensor<string, []>("op_853_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_853_cast_fp16 = einsum(equation = var_853_equation_0, values = (var_757_cast_fp16_7, var_825_cast_fp16))[name = tensor<string, []>("op_853_cast_fp16")];
+            tensor<string, []> var_855_equation_0 = const()[name = tensor<string, []>("op_855_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_855_cast_fp16 = einsum(equation = var_855_equation_0, values = (var_757_cast_fp16_8, var_826_cast_fp16))[name = tensor<string, []>("op_855_cast_fp16")];
+            tensor<string, []> var_857_equation_0 = const()[name = tensor<string, []>("op_857_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_857_cast_fp16 = einsum(equation = var_857_equation_0, values = (var_757_cast_fp16_9, var_827_cast_fp16))[name = tensor<string, []>("op_857_cast_fp16")];
+            tensor<string, []> var_859_equation_0 = const()[name = tensor<string, []>("op_859_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_859_cast_fp16 = einsum(equation = var_859_equation_0, values = (var_757_cast_fp16_10, var_828_cast_fp16))[name = tensor<string, []>("op_859_cast_fp16")];
+            tensor<string, []> var_861_equation_0 = const()[name = tensor<string, []>("op_861_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_861_cast_fp16 = einsum(equation = var_861_equation_0, values = (var_757_cast_fp16_11, var_829_cast_fp16))[name = tensor<string, []>("op_861_cast_fp16")];
+            tensor<string, []> var_863_equation_0 = const()[name = tensor<string, []>("op_863_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_863_cast_fp16 = einsum(equation = var_863_equation_0, values = (var_757_cast_fp16_12, var_830_cast_fp16))[name = tensor<string, []>("op_863_cast_fp16")];
+            tensor<string, []> var_865_equation_0 = const()[name = tensor<string, []>("op_865_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_865_cast_fp16 = einsum(equation = var_865_equation_0, values = (var_757_cast_fp16_13, var_831_cast_fp16))[name = tensor<string, []>("op_865_cast_fp16")];
+            tensor<string, []> var_867_equation_0 = const()[name = tensor<string, []>("op_867_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_867_cast_fp16 = einsum(equation = var_867_equation_0, values = (var_757_cast_fp16_14, var_832_cast_fp16))[name = tensor<string, []>("op_867_cast_fp16")];
+            tensor<string, []> var_869_equation_0 = const()[name = tensor<string, []>("op_869_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_869_cast_fp16 = einsum(equation = var_869_equation_0, values = (var_757_cast_fp16_15, var_833_cast_fp16))[name = tensor<string, []>("op_869_cast_fp16")];
+            tensor<string, []> var_871_equation_0 = const()[name = tensor<string, []>("op_871_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16 = einsum(equation = var_871_equation_0, values = (var_757_cast_fp16_16, var_834_cast_fp16))[name = tensor<string, []>("op_871_cast_fp16")];
+            tensor<string, []> var_873_equation_0 = const()[name = tensor<string, []>("op_873_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_873_cast_fp16 = einsum(equation = var_873_equation_0, values = (var_757_cast_fp16_17, var_835_cast_fp16))[name = tensor<string, []>("op_873_cast_fp16")];
+            tensor<string, []> var_875_equation_0 = const()[name = tensor<string, []>("op_875_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_875_cast_fp16 = einsum(equation = var_875_equation_0, values = (var_757_cast_fp16_18, var_836_cast_fp16))[name = tensor<string, []>("op_875_cast_fp16")];
+            tensor<string, []> var_877_equation_0 = const()[name = tensor<string, []>("op_877_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_877_cast_fp16 = einsum(equation = var_877_equation_0, values = (var_757_cast_fp16_19, var_837_cast_fp16))[name = tensor<string, []>("op_877_cast_fp16")];
+            tensor<bool, []> input_25_interleave_0 = const()[name = tensor<string, []>("input_25_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_25_cast_fp16 = concat(axis = var_662, interleave = input_25_interleave_0, values = (var_839_cast_fp16, var_841_cast_fp16, var_843_cast_fp16, var_845_cast_fp16, var_847_cast_fp16, var_849_cast_fp16, var_851_cast_fp16, var_853_cast_fp16, var_855_cast_fp16, var_857_cast_fp16, var_859_cast_fp16, var_861_cast_fp16, var_863_cast_fp16, var_865_cast_fp16, var_867_cast_fp16, var_869_cast_fp16, var_871_cast_fp16, var_873_cast_fp16, var_875_cast_fp16, var_877_cast_fp16))[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<string, []> var_886_pad_type_0 = const()[name = tensor<string, []>("op_886_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_886_strides_0 = const()[name = tensor<string, []>("op_886_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_886_pad_0 = const()[name = tensor<string, []>("op_886_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_886_dilations_0 = const()[name = tensor<string, []>("op_886_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_886_groups_0 = const()[name = tensor<string, []>("op_886_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_2_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(103206592)))];
+            tensor<fp16, [1280]> blocks_2_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106483456)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_886_cast_fp16 = conv(bias = blocks_2_attn_out_bias_to_fp16, dilations = var_886_dilations_0, groups = var_886_groups_0, pad = var_886_pad_0, pad_type = var_886_pad_type_0, strides = var_886_strides_0, weight = blocks_2_attn_out_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("op_886_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = var_886_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> input_27_axes_0 = const()[name = tensor<string, []>("input_27_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106486080)))];
+            tensor<fp16, [1280]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106488704)))];
+            tensor<fp16, []> var_896_to_fp16 = const()[name = tensor<string, []>("op_896_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_27_cast_fp16 = layer_norm(axes = input_27_axes_0, beta = input_27_beta_0_to_fp16, epsilon = var_896_to_fp16, gamma = input_27_gamma_0_to_fp16, x = inputs_11_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_29_strides_0 = const()[name = tensor<string, []>("input_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_29_dilations_0 = const()[name = tensor<string, []>("input_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_29_groups_0 = const()[name = tensor<string, []>("input_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_2_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106491328)))];
+            tensor<fp16, [5120]> blocks_2_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119598592)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_29_cast_fp16 = conv(bias = blocks_2_mlp_0_bias_to_fp16, dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = blocks_2_mlp_0_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<string, []> var_922_pad_type_0 = const()[name = tensor<string, []>("op_922_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_922_strides_0 = const()[name = tensor<string, []>("op_922_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_922_pad_0 = const()[name = tensor<string, []>("op_922_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_922_dilations_0 = const()[name = tensor<string, []>("op_922_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_922_groups_0 = const()[name = tensor<string, []>("op_922_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_2_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119608896)))];
+            tensor<fp16, [1280]> blocks_2_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132716160)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_922_cast_fp16 = conv(bias = blocks_2_mlp_2_bias_to_fp16, dilations = var_922_dilations_0, groups = var_922_groups_0, pad = var_922_pad_0, pad_type = var_922_pad_type_0, strides = var_922_strides_0, weight = blocks_2_mlp_2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("op_922_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = var_922_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_931 = const()[name = tensor<string, []>("op_931"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_33_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_33_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132718784)))];
+            tensor<fp16, [1280]> input_33_beta_0_to_fp16 = const()[name = tensor<string, []>("input_33_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132721408)))];
+            tensor<fp16, []> var_947_to_fp16 = const()[name = tensor<string, []>("op_947_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, beta = input_33_beta_0_to_fp16, epsilon = var_947_to_fp16, gamma = input_33_gamma_0_to_fp16, x = inputs_13_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<string, []> q_7_pad_type_0 = const()[name = tensor<string, []>("q_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_7_strides_0 = const()[name = tensor<string, []>("q_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_7_pad_0 = const()[name = tensor<string, []>("q_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_7_dilations_0 = const()[name = tensor<string, []>("q_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_7_groups_0 = const()[name = tensor<string, []>("q_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_982_weight_0_to_fp16 = const()[name = tensor<string, []>("op_982_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(132724032)))];
+            tensor<fp16, [1280]> var_982_bias_0_to_fp16 = const()[name = tensor<string, []>("op_982_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(136000896)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_982_cast_fp16 = conv(bias = var_982_bias_0_to_fp16, dilations = q_7_dilations_0, groups = q_7_groups_0, pad = q_7_pad_0, pad_type = q_7_pad_type_0, strides = q_7_strides_0, weight = var_982_weight_0_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_982_cast_fp16")];
+            tensor<string, []> k_7_pad_type_0 = const()[name = tensor<string, []>("k_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_7_strides_0 = const()[name = tensor<string, []>("k_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_7_pad_0 = const()[name = tensor<string, []>("k_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_7_dilations_0 = const()[name = tensor<string, []>("k_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_7_groups_0 = const()[name = tensor<string, []>("k_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_3_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(136003520)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_7_cast_fp16 = conv(dilations = k_7_dilations_0, groups = k_7_groups_0, pad = k_7_pad_0, pad_type = k_7_pad_type_0, strides = k_7_strides_0, weight = blocks_3_attn_key_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("k_7_cast_fp16")];
+            tensor<string, []> var_980_pad_type_0 = const()[name = tensor<string, []>("op_980_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_980_strides_0 = const()[name = tensor<string, []>("op_980_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_980_pad_0 = const()[name = tensor<string, []>("op_980_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_980_dilations_0 = const()[name = tensor<string, []>("op_980_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_980_groups_0 = const()[name = tensor<string, []>("op_980_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_3_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(139280384)))];
+            tensor<fp16, [1280]> blocks_3_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142557248)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_980_cast_fp16 = conv(bias = blocks_3_attn_value_bias_to_fp16, dilations = var_980_dilations_0, groups = var_980_groups_0, pad = var_980_pad_0, pad_type = var_980_pad_type_0, strides = var_980_strides_0, weight = blocks_3_attn_value_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_980_cast_fp16")];
+            tensor<int32, [20]> tile_9 = const()[name = tensor<string, []>("tile_9"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_983_axis_0 = const()[name = tensor<string, []>("op_983_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_19 = split(axis = var_983_axis_0, split_sizes = tile_9, x = var_982_cast_fp16)[name = tensor<string, []>("op_983_cast_fp16")];
+            tensor<int32, [4]> var_1004_perm_0 = const()[name = tensor<string, []>("op_1004_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_10 = const()[name = tensor<string, []>("tile_10"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1005_axis_0 = const()[name = tensor<string, []>("op_1005_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_1004_cast_fp16 = transpose(perm = var_1004_perm_0, x = k_7_cast_fp16)[name = tensor<string, []>("transpose_29")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_1005_cast_fp16_19 = split(axis = var_1005_axis_0, split_sizes = tile_10, x = var_1004_cast_fp16)[name = tensor<string, []>("op_1005_cast_fp16")];
+            tensor<int32, [20]> tile_11 = const()[name = tensor<string, []>("tile_11"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1026_axis_0 = const()[name = tensor<string, []>("op_1026_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1026_cast_fp16_19 = split(axis = var_1026_axis_0, split_sizes = tile_11, x = var_980_cast_fp16)[name = tensor<string, []>("op_1026_cast_fp16")];
+            tensor<string, []> aw_121_equation_0 = const()[name = tensor<string, []>("aw_121_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_121_cast_fp16 = einsum(equation = aw_121_equation_0, values = (var_1005_cast_fp16_0, var_983_cast_fp16_0))[name = tensor<string, []>("aw_121_cast_fp16")];
+            tensor<string, []> aw_123_equation_0 = const()[name = tensor<string, []>("aw_123_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_123_cast_fp16 = einsum(equation = aw_123_equation_0, values = (var_1005_cast_fp16_1, var_983_cast_fp16_1))[name = tensor<string, []>("aw_123_cast_fp16")];
+            tensor<string, []> aw_125_equation_0 = const()[name = tensor<string, []>("aw_125_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_125_cast_fp16 = einsum(equation = aw_125_equation_0, values = (var_1005_cast_fp16_2, var_983_cast_fp16_2))[name = tensor<string, []>("aw_125_cast_fp16")];
+            tensor<string, []> aw_127_equation_0 = const()[name = tensor<string, []>("aw_127_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_127_cast_fp16 = einsum(equation = aw_127_equation_0, values = (var_1005_cast_fp16_3, var_983_cast_fp16_3))[name = tensor<string, []>("aw_127_cast_fp16")];
+            tensor<string, []> aw_129_equation_0 = const()[name = tensor<string, []>("aw_129_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_129_cast_fp16 = einsum(equation = aw_129_equation_0, values = (var_1005_cast_fp16_4, var_983_cast_fp16_4))[name = tensor<string, []>("aw_129_cast_fp16")];
+            tensor<string, []> aw_131_equation_0 = const()[name = tensor<string, []>("aw_131_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_131_cast_fp16 = einsum(equation = aw_131_equation_0, values = (var_1005_cast_fp16_5, var_983_cast_fp16_5))[name = tensor<string, []>("aw_131_cast_fp16")];
+            tensor<string, []> aw_133_equation_0 = const()[name = tensor<string, []>("aw_133_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_133_cast_fp16 = einsum(equation = aw_133_equation_0, values = (var_1005_cast_fp16_6, var_983_cast_fp16_6))[name = tensor<string, []>("aw_133_cast_fp16")];
+            tensor<string, []> aw_135_equation_0 = const()[name = tensor<string, []>("aw_135_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_135_cast_fp16 = einsum(equation = aw_135_equation_0, values = (var_1005_cast_fp16_7, var_983_cast_fp16_7))[name = tensor<string, []>("aw_135_cast_fp16")];
+            tensor<string, []> aw_137_equation_0 = const()[name = tensor<string, []>("aw_137_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_137_cast_fp16 = einsum(equation = aw_137_equation_0, values = (var_1005_cast_fp16_8, var_983_cast_fp16_8))[name = tensor<string, []>("aw_137_cast_fp16")];
+            tensor<string, []> aw_139_equation_0 = const()[name = tensor<string, []>("aw_139_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_139_cast_fp16 = einsum(equation = aw_139_equation_0, values = (var_1005_cast_fp16_9, var_983_cast_fp16_9))[name = tensor<string, []>("aw_139_cast_fp16")];
+            tensor<string, []> aw_141_equation_0 = const()[name = tensor<string, []>("aw_141_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_141_cast_fp16 = einsum(equation = aw_141_equation_0, values = (var_1005_cast_fp16_10, var_983_cast_fp16_10))[name = tensor<string, []>("aw_141_cast_fp16")];
+            tensor<string, []> aw_143_equation_0 = const()[name = tensor<string, []>("aw_143_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_143_cast_fp16 = einsum(equation = aw_143_equation_0, values = (var_1005_cast_fp16_11, var_983_cast_fp16_11))[name = tensor<string, []>("aw_143_cast_fp16")];
+            tensor<string, []> aw_145_equation_0 = const()[name = tensor<string, []>("aw_145_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_145_cast_fp16 = einsum(equation = aw_145_equation_0, values = (var_1005_cast_fp16_12, var_983_cast_fp16_12))[name = tensor<string, []>("aw_145_cast_fp16")];
+            tensor<string, []> aw_147_equation_0 = const()[name = tensor<string, []>("aw_147_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_147_cast_fp16 = einsum(equation = aw_147_equation_0, values = (var_1005_cast_fp16_13, var_983_cast_fp16_13))[name = tensor<string, []>("aw_147_cast_fp16")];
+            tensor<string, []> aw_149_equation_0 = const()[name = tensor<string, []>("aw_149_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_149_cast_fp16 = einsum(equation = aw_149_equation_0, values = (var_1005_cast_fp16_14, var_983_cast_fp16_14))[name = tensor<string, []>("aw_149_cast_fp16")];
+            tensor<string, []> aw_151_equation_0 = const()[name = tensor<string, []>("aw_151_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_151_cast_fp16 = einsum(equation = aw_151_equation_0, values = (var_1005_cast_fp16_15, var_983_cast_fp16_15))[name = tensor<string, []>("aw_151_cast_fp16")];
+            tensor<string, []> aw_153_equation_0 = const()[name = tensor<string, []>("aw_153_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_153_cast_fp16 = einsum(equation = aw_153_equation_0, values = (var_1005_cast_fp16_16, var_983_cast_fp16_16))[name = tensor<string, []>("aw_153_cast_fp16")];
+            tensor<string, []> aw_155_equation_0 = const()[name = tensor<string, []>("aw_155_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_155_cast_fp16 = einsum(equation = aw_155_equation_0, values = (var_1005_cast_fp16_17, var_983_cast_fp16_17))[name = tensor<string, []>("aw_155_cast_fp16")];
+            tensor<string, []> aw_157_equation_0 = const()[name = tensor<string, []>("aw_157_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_157_cast_fp16 = einsum(equation = aw_157_equation_0, values = (var_1005_cast_fp16_18, var_983_cast_fp16_18))[name = tensor<string, []>("aw_157_cast_fp16")];
+            tensor<string, []> aw_159_equation_0 = const()[name = tensor<string, []>("aw_159_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_159_cast_fp16 = einsum(equation = aw_159_equation_0, values = (var_1005_cast_fp16_19, var_983_cast_fp16_19))[name = tensor<string, []>("aw_159_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1087_cast_fp16 = softmax(axis = var_931, x = aw_121_cast_fp16)[name = tensor<string, []>("op_1087_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1088_cast_fp16 = softmax(axis = var_931, x = aw_123_cast_fp16)[name = tensor<string, []>("op_1088_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1089_cast_fp16 = softmax(axis = var_931, x = aw_125_cast_fp16)[name = tensor<string, []>("op_1089_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1090_cast_fp16 = softmax(axis = var_931, x = aw_127_cast_fp16)[name = tensor<string, []>("op_1090_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1091_cast_fp16 = softmax(axis = var_931, x = aw_129_cast_fp16)[name = tensor<string, []>("op_1091_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1092_cast_fp16 = softmax(axis = var_931, x = aw_131_cast_fp16)[name = tensor<string, []>("op_1092_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1093_cast_fp16 = softmax(axis = var_931, x = aw_133_cast_fp16)[name = tensor<string, []>("op_1093_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1094_cast_fp16 = softmax(axis = var_931, x = aw_135_cast_fp16)[name = tensor<string, []>("op_1094_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1095_cast_fp16 = softmax(axis = var_931, x = aw_137_cast_fp16)[name = tensor<string, []>("op_1095_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1096_cast_fp16 = softmax(axis = var_931, x = aw_139_cast_fp16)[name = tensor<string, []>("op_1096_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1097_cast_fp16 = softmax(axis = var_931, x = aw_141_cast_fp16)[name = tensor<string, []>("op_1097_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1098_cast_fp16 = softmax(axis = var_931, x = aw_143_cast_fp16)[name = tensor<string, []>("op_1098_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1099_cast_fp16 = softmax(axis = var_931, x = aw_145_cast_fp16)[name = tensor<string, []>("op_1099_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1100_cast_fp16 = softmax(axis = var_931, x = aw_147_cast_fp16)[name = tensor<string, []>("op_1100_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1101_cast_fp16 = softmax(axis = var_931, x = aw_149_cast_fp16)[name = tensor<string, []>("op_1101_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1102_cast_fp16 = softmax(axis = var_931, x = aw_151_cast_fp16)[name = tensor<string, []>("op_1102_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1103_cast_fp16 = softmax(axis = var_931, x = aw_153_cast_fp16)[name = tensor<string, []>("op_1103_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1104_cast_fp16 = softmax(axis = var_931, x = aw_155_cast_fp16)[name = tensor<string, []>("op_1104_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1105_cast_fp16 = softmax(axis = var_931, x = aw_157_cast_fp16)[name = tensor<string, []>("op_1105_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1106_cast_fp16 = softmax(axis = var_931, x = aw_159_cast_fp16)[name = tensor<string, []>("op_1106_cast_fp16")];
+            tensor<string, []> var_1108_equation_0 = const()[name = tensor<string, []>("op_1108_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16 = einsum(equation = var_1108_equation_0, values = (var_1026_cast_fp16_0, var_1087_cast_fp16))[name = tensor<string, []>("op_1108_cast_fp16")];
+            tensor<string, []> var_1110_equation_0 = const()[name = tensor<string, []>("op_1110_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1110_cast_fp16 = einsum(equation = var_1110_equation_0, values = (var_1026_cast_fp16_1, var_1088_cast_fp16))[name = tensor<string, []>("op_1110_cast_fp16")];
+            tensor<string, []> var_1112_equation_0 = const()[name = tensor<string, []>("op_1112_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1112_cast_fp16 = einsum(equation = var_1112_equation_0, values = (var_1026_cast_fp16_2, var_1089_cast_fp16))[name = tensor<string, []>("op_1112_cast_fp16")];
+            tensor<string, []> var_1114_equation_0 = const()[name = tensor<string, []>("op_1114_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1114_cast_fp16 = einsum(equation = var_1114_equation_0, values = (var_1026_cast_fp16_3, var_1090_cast_fp16))[name = tensor<string, []>("op_1114_cast_fp16")];
+            tensor<string, []> var_1116_equation_0 = const()[name = tensor<string, []>("op_1116_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1116_cast_fp16 = einsum(equation = var_1116_equation_0, values = (var_1026_cast_fp16_4, var_1091_cast_fp16))[name = tensor<string, []>("op_1116_cast_fp16")];
+            tensor<string, []> var_1118_equation_0 = const()[name = tensor<string, []>("op_1118_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1118_cast_fp16 = einsum(equation = var_1118_equation_0, values = (var_1026_cast_fp16_5, var_1092_cast_fp16))[name = tensor<string, []>("op_1118_cast_fp16")];
+            tensor<string, []> var_1120_equation_0 = const()[name = tensor<string, []>("op_1120_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1120_cast_fp16 = einsum(equation = var_1120_equation_0, values = (var_1026_cast_fp16_6, var_1093_cast_fp16))[name = tensor<string, []>("op_1120_cast_fp16")];
+            tensor<string, []> var_1122_equation_0 = const()[name = tensor<string, []>("op_1122_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1122_cast_fp16 = einsum(equation = var_1122_equation_0, values = (var_1026_cast_fp16_7, var_1094_cast_fp16))[name = tensor<string, []>("op_1122_cast_fp16")];
+            tensor<string, []> var_1124_equation_0 = const()[name = tensor<string, []>("op_1124_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1124_cast_fp16 = einsum(equation = var_1124_equation_0, values = (var_1026_cast_fp16_8, var_1095_cast_fp16))[name = tensor<string, []>("op_1124_cast_fp16")];
+            tensor<string, []> var_1126_equation_0 = const()[name = tensor<string, []>("op_1126_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1126_cast_fp16 = einsum(equation = var_1126_equation_0, values = (var_1026_cast_fp16_9, var_1096_cast_fp16))[name = tensor<string, []>("op_1126_cast_fp16")];
+            tensor<string, []> var_1128_equation_0 = const()[name = tensor<string, []>("op_1128_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1128_cast_fp16 = einsum(equation = var_1128_equation_0, values = (var_1026_cast_fp16_10, var_1097_cast_fp16))[name = tensor<string, []>("op_1128_cast_fp16")];
+            tensor<string, []> var_1130_equation_0 = const()[name = tensor<string, []>("op_1130_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1130_cast_fp16 = einsum(equation = var_1130_equation_0, values = (var_1026_cast_fp16_11, var_1098_cast_fp16))[name = tensor<string, []>("op_1130_cast_fp16")];
+            tensor<string, []> var_1132_equation_0 = const()[name = tensor<string, []>("op_1132_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1132_cast_fp16 = einsum(equation = var_1132_equation_0, values = (var_1026_cast_fp16_12, var_1099_cast_fp16))[name = tensor<string, []>("op_1132_cast_fp16")];
+            tensor<string, []> var_1134_equation_0 = const()[name = tensor<string, []>("op_1134_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1134_cast_fp16 = einsum(equation = var_1134_equation_0, values = (var_1026_cast_fp16_13, var_1100_cast_fp16))[name = tensor<string, []>("op_1134_cast_fp16")];
+            tensor<string, []> var_1136_equation_0 = const()[name = tensor<string, []>("op_1136_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1136_cast_fp16 = einsum(equation = var_1136_equation_0, values = (var_1026_cast_fp16_14, var_1101_cast_fp16))[name = tensor<string, []>("op_1136_cast_fp16")];
+            tensor<string, []> var_1138_equation_0 = const()[name = tensor<string, []>("op_1138_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1138_cast_fp16 = einsum(equation = var_1138_equation_0, values = (var_1026_cast_fp16_15, var_1102_cast_fp16))[name = tensor<string, []>("op_1138_cast_fp16")];
+            tensor<string, []> var_1140_equation_0 = const()[name = tensor<string, []>("op_1140_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1140_cast_fp16 = einsum(equation = var_1140_equation_0, values = (var_1026_cast_fp16_16, var_1103_cast_fp16))[name = tensor<string, []>("op_1140_cast_fp16")];
+            tensor<string, []> var_1142_equation_0 = const()[name = tensor<string, []>("op_1142_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1142_cast_fp16 = einsum(equation = var_1142_equation_0, values = (var_1026_cast_fp16_17, var_1104_cast_fp16))[name = tensor<string, []>("op_1142_cast_fp16")];
+            tensor<string, []> var_1144_equation_0 = const()[name = tensor<string, []>("op_1144_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1144_cast_fp16 = einsum(equation = var_1144_equation_0, values = (var_1026_cast_fp16_18, var_1105_cast_fp16))[name = tensor<string, []>("op_1144_cast_fp16")];
+            tensor<string, []> var_1146_equation_0 = const()[name = tensor<string, []>("op_1146_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1146_cast_fp16 = einsum(equation = var_1146_equation_0, values = (var_1026_cast_fp16_19, var_1106_cast_fp16))[name = tensor<string, []>("op_1146_cast_fp16")];
+            tensor<bool, []> input_35_interleave_0 = const()[name = tensor<string, []>("input_35_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_35_cast_fp16 = concat(axis = var_931, interleave = input_35_interleave_0, values = (var_1108_cast_fp16, var_1110_cast_fp16, var_1112_cast_fp16, var_1114_cast_fp16, var_1116_cast_fp16, var_1118_cast_fp16, var_1120_cast_fp16, var_1122_cast_fp16, var_1124_cast_fp16, var_1126_cast_fp16, var_1128_cast_fp16, var_1130_cast_fp16, var_1132_cast_fp16, var_1134_cast_fp16, var_1136_cast_fp16, var_1138_cast_fp16, var_1140_cast_fp16, var_1142_cast_fp16, var_1144_cast_fp16, var_1146_cast_fp16))[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<string, []> var_1155_pad_type_0 = const()[name = tensor<string, []>("op_1155_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1155_strides_0 = const()[name = tensor<string, []>("op_1155_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1155_pad_0 = const()[name = tensor<string, []>("op_1155_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1155_dilations_0 = const()[name = tensor<string, []>("op_1155_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1155_groups_0 = const()[name = tensor<string, []>("op_1155_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_3_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142559872)))];
+            tensor<fp16, [1280]> blocks_3_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145836736)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1155_cast_fp16 = conv(bias = blocks_3_attn_out_bias_to_fp16, dilations = var_1155_dilations_0, groups = var_1155_groups_0, pad = var_1155_pad_0, pad_type = var_1155_pad_type_0, strides = var_1155_strides_0, weight = blocks_3_attn_out_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("op_1155_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = var_1155_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> input_37_axes_0 = const()[name = tensor<string, []>("input_37_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_37_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145839360)))];
+            tensor<fp16, [1280]> input_37_beta_0_to_fp16 = const()[name = tensor<string, []>("input_37_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145841984)))];
+            tensor<fp16, []> var_1165_to_fp16 = const()[name = tensor<string, []>("op_1165_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = input_37_beta_0_to_fp16, epsilon = var_1165_to_fp16, gamma = input_37_gamma_0_to_fp16, x = inputs_15_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_pad_type_0 = const()[name = tensor<string, []>("input_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_39_strides_0 = const()[name = tensor<string, []>("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_39_pad_0 = const()[name = tensor<string, []>("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_39_dilations_0 = const()[name = tensor<string, []>("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_39_groups_0 = const()[name = tensor<string, []>("input_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_3_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(145844608)))];
+            tensor<fp16, [5120]> blocks_3_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(158951872)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_39_cast_fp16 = conv(bias = blocks_3_mlp_0_bias_to_fp16, dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = blocks_3_mlp_0_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<string, []> input_41_mode_0 = const()[name = tensor<string, []>("input_41_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = input_39_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<string, []> var_1191_pad_type_0 = const()[name = tensor<string, []>("op_1191_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1191_strides_0 = const()[name = tensor<string, []>("op_1191_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1191_pad_0 = const()[name = tensor<string, []>("op_1191_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1191_dilations_0 = const()[name = tensor<string, []>("op_1191_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1191_groups_0 = const()[name = tensor<string, []>("op_1191_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_3_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(158962176)))];
+            tensor<fp16, [1280]> blocks_3_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172069440)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1191_cast_fp16 = conv(bias = blocks_3_mlp_2_bias_to_fp16, dilations = var_1191_dilations_0, groups = var_1191_groups_0, pad = var_1191_pad_0, pad_type = var_1191_pad_type_0, strides = var_1191_strides_0, weight = blocks_3_mlp_2_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("op_1191_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = var_1191_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, []> var_1200 = const()[name = tensor<string, []>("op_1200"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_43_axes_0 = const()[name = tensor<string, []>("input_43_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_43_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172072064)))];
+            tensor<fp16, [1280]> input_43_beta_0_to_fp16 = const()[name = tensor<string, []>("input_43_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172074688)))];
+            tensor<fp16, []> var_1216_to_fp16 = const()[name = tensor<string, []>("op_1216_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_43_cast_fp16 = layer_norm(axes = input_43_axes_0, beta = input_43_beta_0_to_fp16, epsilon = var_1216_to_fp16, gamma = input_43_gamma_0_to_fp16, x = inputs_17_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<string, []> q_9_pad_type_0 = const()[name = tensor<string, []>("q_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_9_strides_0 = const()[name = tensor<string, []>("q_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_9_pad_0 = const()[name = tensor<string, []>("q_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_9_dilations_0 = const()[name = tensor<string, []>("q_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_9_groups_0 = const()[name = tensor<string, []>("q_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_1251_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1251_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(172077312)))];
+            tensor<fp16, [1280]> var_1251_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1251_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(175354176)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1251_cast_fp16 = conv(bias = var_1251_bias_0_to_fp16, dilations = q_9_dilations_0, groups = q_9_groups_0, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = q_9_strides_0, weight = var_1251_weight_0_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_1251_cast_fp16")];
+            tensor<string, []> k_9_pad_type_0 = const()[name = tensor<string, []>("k_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_9_strides_0 = const()[name = tensor<string, []>("k_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_9_pad_0 = const()[name = tensor<string, []>("k_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_9_dilations_0 = const()[name = tensor<string, []>("k_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_9_groups_0 = const()[name = tensor<string, []>("k_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_4_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(175356800)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_9_cast_fp16 = conv(dilations = k_9_dilations_0, groups = k_9_groups_0, pad = k_9_pad_0, pad_type = k_9_pad_type_0, strides = k_9_strides_0, weight = blocks_4_attn_key_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("k_9_cast_fp16")];
+            tensor<string, []> var_1249_pad_type_0 = const()[name = tensor<string, []>("op_1249_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1249_strides_0 = const()[name = tensor<string, []>("op_1249_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1249_pad_0 = const()[name = tensor<string, []>("op_1249_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1249_dilations_0 = const()[name = tensor<string, []>("op_1249_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1249_groups_0 = const()[name = tensor<string, []>("op_1249_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_4_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(178633664)))];
+            tensor<fp16, [1280]> blocks_4_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(181910528)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1249_cast_fp16 = conv(bias = blocks_4_attn_value_bias_to_fp16, dilations = var_1249_dilations_0, groups = var_1249_groups_0, pad = var_1249_pad_0, pad_type = var_1249_pad_type_0, strides = var_1249_strides_0, weight = blocks_4_attn_value_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_1249_cast_fp16")];
+            tensor<int32, [20]> tile_12 = const()[name = tensor<string, []>("tile_12"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1252_axis_0 = const()[name = tensor<string, []>("op_1252_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16_19 = split(axis = var_1252_axis_0, split_sizes = tile_12, x = var_1251_cast_fp16)[name = tensor<string, []>("op_1252_cast_fp16")];
+            tensor<int32, [4]> var_1273_perm_0 = const()[name = tensor<string, []>("op_1273_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_13 = const()[name = tensor<string, []>("tile_13"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1274_axis_0 = const()[name = tensor<string, []>("op_1274_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_1273_cast_fp16 = transpose(perm = var_1273_perm_0, x = k_9_cast_fp16)[name = tensor<string, []>("transpose_28")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_1274_cast_fp16_19 = split(axis = var_1274_axis_0, split_sizes = tile_13, x = var_1273_cast_fp16)[name = tensor<string, []>("op_1274_cast_fp16")];
+            tensor<int32, [20]> tile_14 = const()[name = tensor<string, []>("tile_14"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1295_axis_0 = const()[name = tensor<string, []>("op_1295_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1295_cast_fp16_19 = split(axis = var_1295_axis_0, split_sizes = tile_14, x = var_1249_cast_fp16)[name = tensor<string, []>("op_1295_cast_fp16")];
+            tensor<string, []> aw_161_equation_0 = const()[name = tensor<string, []>("aw_161_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_161_cast_fp16 = einsum(equation = aw_161_equation_0, values = (var_1274_cast_fp16_0, var_1252_cast_fp16_0))[name = tensor<string, []>("aw_161_cast_fp16")];
+            tensor<string, []> aw_163_equation_0 = const()[name = tensor<string, []>("aw_163_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_163_cast_fp16 = einsum(equation = aw_163_equation_0, values = (var_1274_cast_fp16_1, var_1252_cast_fp16_1))[name = tensor<string, []>("aw_163_cast_fp16")];
+            tensor<string, []> aw_165_equation_0 = const()[name = tensor<string, []>("aw_165_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_165_cast_fp16 = einsum(equation = aw_165_equation_0, values = (var_1274_cast_fp16_2, var_1252_cast_fp16_2))[name = tensor<string, []>("aw_165_cast_fp16")];
+            tensor<string, []> aw_167_equation_0 = const()[name = tensor<string, []>("aw_167_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_167_cast_fp16 = einsum(equation = aw_167_equation_0, values = (var_1274_cast_fp16_3, var_1252_cast_fp16_3))[name = tensor<string, []>("aw_167_cast_fp16")];
+            tensor<string, []> aw_169_equation_0 = const()[name = tensor<string, []>("aw_169_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_169_cast_fp16 = einsum(equation = aw_169_equation_0, values = (var_1274_cast_fp16_4, var_1252_cast_fp16_4))[name = tensor<string, []>("aw_169_cast_fp16")];
+            tensor<string, []> aw_171_equation_0 = const()[name = tensor<string, []>("aw_171_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_171_cast_fp16 = einsum(equation = aw_171_equation_0, values = (var_1274_cast_fp16_5, var_1252_cast_fp16_5))[name = tensor<string, []>("aw_171_cast_fp16")];
+            tensor<string, []> aw_173_equation_0 = const()[name = tensor<string, []>("aw_173_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_173_cast_fp16 = einsum(equation = aw_173_equation_0, values = (var_1274_cast_fp16_6, var_1252_cast_fp16_6))[name = tensor<string, []>("aw_173_cast_fp16")];
+            tensor<string, []> aw_175_equation_0 = const()[name = tensor<string, []>("aw_175_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_175_cast_fp16 = einsum(equation = aw_175_equation_0, values = (var_1274_cast_fp16_7, var_1252_cast_fp16_7))[name = tensor<string, []>("aw_175_cast_fp16")];
+            tensor<string, []> aw_177_equation_0 = const()[name = tensor<string, []>("aw_177_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_177_cast_fp16 = einsum(equation = aw_177_equation_0, values = (var_1274_cast_fp16_8, var_1252_cast_fp16_8))[name = tensor<string, []>("aw_177_cast_fp16")];
+            tensor<string, []> aw_179_equation_0 = const()[name = tensor<string, []>("aw_179_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_179_cast_fp16 = einsum(equation = aw_179_equation_0, values = (var_1274_cast_fp16_9, var_1252_cast_fp16_9))[name = tensor<string, []>("aw_179_cast_fp16")];
+            tensor<string, []> aw_181_equation_0 = const()[name = tensor<string, []>("aw_181_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_181_cast_fp16 = einsum(equation = aw_181_equation_0, values = (var_1274_cast_fp16_10, var_1252_cast_fp16_10))[name = tensor<string, []>("aw_181_cast_fp16")];
+            tensor<string, []> aw_183_equation_0 = const()[name = tensor<string, []>("aw_183_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_183_cast_fp16 = einsum(equation = aw_183_equation_0, values = (var_1274_cast_fp16_11, var_1252_cast_fp16_11))[name = tensor<string, []>("aw_183_cast_fp16")];
+            tensor<string, []> aw_185_equation_0 = const()[name = tensor<string, []>("aw_185_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_185_cast_fp16 = einsum(equation = aw_185_equation_0, values = (var_1274_cast_fp16_12, var_1252_cast_fp16_12))[name = tensor<string, []>("aw_185_cast_fp16")];
+            tensor<string, []> aw_187_equation_0 = const()[name = tensor<string, []>("aw_187_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_187_cast_fp16 = einsum(equation = aw_187_equation_0, values = (var_1274_cast_fp16_13, var_1252_cast_fp16_13))[name = tensor<string, []>("aw_187_cast_fp16")];
+            tensor<string, []> aw_189_equation_0 = const()[name = tensor<string, []>("aw_189_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_189_cast_fp16 = einsum(equation = aw_189_equation_0, values = (var_1274_cast_fp16_14, var_1252_cast_fp16_14))[name = tensor<string, []>("aw_189_cast_fp16")];
+            tensor<string, []> aw_191_equation_0 = const()[name = tensor<string, []>("aw_191_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_191_cast_fp16 = einsum(equation = aw_191_equation_0, values = (var_1274_cast_fp16_15, var_1252_cast_fp16_15))[name = tensor<string, []>("aw_191_cast_fp16")];
+            tensor<string, []> aw_193_equation_0 = const()[name = tensor<string, []>("aw_193_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_193_cast_fp16 = einsum(equation = aw_193_equation_0, values = (var_1274_cast_fp16_16, var_1252_cast_fp16_16))[name = tensor<string, []>("aw_193_cast_fp16")];
+            tensor<string, []> aw_195_equation_0 = const()[name = tensor<string, []>("aw_195_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_195_cast_fp16 = einsum(equation = aw_195_equation_0, values = (var_1274_cast_fp16_17, var_1252_cast_fp16_17))[name = tensor<string, []>("aw_195_cast_fp16")];
+            tensor<string, []> aw_197_equation_0 = const()[name = tensor<string, []>("aw_197_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_197_cast_fp16 = einsum(equation = aw_197_equation_0, values = (var_1274_cast_fp16_18, var_1252_cast_fp16_18))[name = tensor<string, []>("aw_197_cast_fp16")];
+            tensor<string, []> aw_199_equation_0 = const()[name = tensor<string, []>("aw_199_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_199_cast_fp16 = einsum(equation = aw_199_equation_0, values = (var_1274_cast_fp16_19, var_1252_cast_fp16_19))[name = tensor<string, []>("aw_199_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1356_cast_fp16 = softmax(axis = var_1200, x = aw_161_cast_fp16)[name = tensor<string, []>("op_1356_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1357_cast_fp16 = softmax(axis = var_1200, x = aw_163_cast_fp16)[name = tensor<string, []>("op_1357_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1358_cast_fp16 = softmax(axis = var_1200, x = aw_165_cast_fp16)[name = tensor<string, []>("op_1358_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1359_cast_fp16 = softmax(axis = var_1200, x = aw_167_cast_fp16)[name = tensor<string, []>("op_1359_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1360_cast_fp16 = softmax(axis = var_1200, x = aw_169_cast_fp16)[name = tensor<string, []>("op_1360_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1361_cast_fp16 = softmax(axis = var_1200, x = aw_171_cast_fp16)[name = tensor<string, []>("op_1361_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1362_cast_fp16 = softmax(axis = var_1200, x = aw_173_cast_fp16)[name = tensor<string, []>("op_1362_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1363_cast_fp16 = softmax(axis = var_1200, x = aw_175_cast_fp16)[name = tensor<string, []>("op_1363_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1364_cast_fp16 = softmax(axis = var_1200, x = aw_177_cast_fp16)[name = tensor<string, []>("op_1364_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1365_cast_fp16 = softmax(axis = var_1200, x = aw_179_cast_fp16)[name = tensor<string, []>("op_1365_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1366_cast_fp16 = softmax(axis = var_1200, x = aw_181_cast_fp16)[name = tensor<string, []>("op_1366_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1367_cast_fp16 = softmax(axis = var_1200, x = aw_183_cast_fp16)[name = tensor<string, []>("op_1367_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1368_cast_fp16 = softmax(axis = var_1200, x = aw_185_cast_fp16)[name = tensor<string, []>("op_1368_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1369_cast_fp16 = softmax(axis = var_1200, x = aw_187_cast_fp16)[name = tensor<string, []>("op_1369_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1370_cast_fp16 = softmax(axis = var_1200, x = aw_189_cast_fp16)[name = tensor<string, []>("op_1370_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1371_cast_fp16 = softmax(axis = var_1200, x = aw_191_cast_fp16)[name = tensor<string, []>("op_1371_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1372_cast_fp16 = softmax(axis = var_1200, x = aw_193_cast_fp16)[name = tensor<string, []>("op_1372_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1373_cast_fp16 = softmax(axis = var_1200, x = aw_195_cast_fp16)[name = tensor<string, []>("op_1373_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1374_cast_fp16 = softmax(axis = var_1200, x = aw_197_cast_fp16)[name = tensor<string, []>("op_1374_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1375_cast_fp16 = softmax(axis = var_1200, x = aw_199_cast_fp16)[name = tensor<string, []>("op_1375_cast_fp16")];
+            tensor<string, []> var_1377_equation_0 = const()[name = tensor<string, []>("op_1377_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1377_cast_fp16 = einsum(equation = var_1377_equation_0, values = (var_1295_cast_fp16_0, var_1356_cast_fp16))[name = tensor<string, []>("op_1377_cast_fp16")];
+            tensor<string, []> var_1379_equation_0 = const()[name = tensor<string, []>("op_1379_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1379_cast_fp16 = einsum(equation = var_1379_equation_0, values = (var_1295_cast_fp16_1, var_1357_cast_fp16))[name = tensor<string, []>("op_1379_cast_fp16")];
+            tensor<string, []> var_1381_equation_0 = const()[name = tensor<string, []>("op_1381_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1381_cast_fp16 = einsum(equation = var_1381_equation_0, values = (var_1295_cast_fp16_2, var_1358_cast_fp16))[name = tensor<string, []>("op_1381_cast_fp16")];
+            tensor<string, []> var_1383_equation_0 = const()[name = tensor<string, []>("op_1383_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1383_cast_fp16 = einsum(equation = var_1383_equation_0, values = (var_1295_cast_fp16_3, var_1359_cast_fp16))[name = tensor<string, []>("op_1383_cast_fp16")];
+            tensor<string, []> var_1385_equation_0 = const()[name = tensor<string, []>("op_1385_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1385_cast_fp16 = einsum(equation = var_1385_equation_0, values = (var_1295_cast_fp16_4, var_1360_cast_fp16))[name = tensor<string, []>("op_1385_cast_fp16")];
+            tensor<string, []> var_1387_equation_0 = const()[name = tensor<string, []>("op_1387_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1387_cast_fp16 = einsum(equation = var_1387_equation_0, values = (var_1295_cast_fp16_5, var_1361_cast_fp16))[name = tensor<string, []>("op_1387_cast_fp16")];
+            tensor<string, []> var_1389_equation_0 = const()[name = tensor<string, []>("op_1389_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1389_cast_fp16 = einsum(equation = var_1389_equation_0, values = (var_1295_cast_fp16_6, var_1362_cast_fp16))[name = tensor<string, []>("op_1389_cast_fp16")];
+            tensor<string, []> var_1391_equation_0 = const()[name = tensor<string, []>("op_1391_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1391_cast_fp16 = einsum(equation = var_1391_equation_0, values = (var_1295_cast_fp16_7, var_1363_cast_fp16))[name = tensor<string, []>("op_1391_cast_fp16")];
+            tensor<string, []> var_1393_equation_0 = const()[name = tensor<string, []>("op_1393_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16 = einsum(equation = var_1393_equation_0, values = (var_1295_cast_fp16_8, var_1364_cast_fp16))[name = tensor<string, []>("op_1393_cast_fp16")];
+            tensor<string, []> var_1395_equation_0 = const()[name = tensor<string, []>("op_1395_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1395_cast_fp16 = einsum(equation = var_1395_equation_0, values = (var_1295_cast_fp16_9, var_1365_cast_fp16))[name = tensor<string, []>("op_1395_cast_fp16")];
+            tensor<string, []> var_1397_equation_0 = const()[name = tensor<string, []>("op_1397_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1397_cast_fp16 = einsum(equation = var_1397_equation_0, values = (var_1295_cast_fp16_10, var_1366_cast_fp16))[name = tensor<string, []>("op_1397_cast_fp16")];
+            tensor<string, []> var_1399_equation_0 = const()[name = tensor<string, []>("op_1399_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1399_cast_fp16 = einsum(equation = var_1399_equation_0, values = (var_1295_cast_fp16_11, var_1367_cast_fp16))[name = tensor<string, []>("op_1399_cast_fp16")];
+            tensor<string, []> var_1401_equation_0 = const()[name = tensor<string, []>("op_1401_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1401_cast_fp16 = einsum(equation = var_1401_equation_0, values = (var_1295_cast_fp16_12, var_1368_cast_fp16))[name = tensor<string, []>("op_1401_cast_fp16")];
+            tensor<string, []> var_1403_equation_0 = const()[name = tensor<string, []>("op_1403_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1403_cast_fp16 = einsum(equation = var_1403_equation_0, values = (var_1295_cast_fp16_13, var_1369_cast_fp16))[name = tensor<string, []>("op_1403_cast_fp16")];
+            tensor<string, []> var_1405_equation_0 = const()[name = tensor<string, []>("op_1405_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1405_cast_fp16 = einsum(equation = var_1405_equation_0, values = (var_1295_cast_fp16_14, var_1370_cast_fp16))[name = tensor<string, []>("op_1405_cast_fp16")];
+            tensor<string, []> var_1407_equation_0 = const()[name = tensor<string, []>("op_1407_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1407_cast_fp16 = einsum(equation = var_1407_equation_0, values = (var_1295_cast_fp16_15, var_1371_cast_fp16))[name = tensor<string, []>("op_1407_cast_fp16")];
+            tensor<string, []> var_1409_equation_0 = const()[name = tensor<string, []>("op_1409_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1409_cast_fp16 = einsum(equation = var_1409_equation_0, values = (var_1295_cast_fp16_16, var_1372_cast_fp16))[name = tensor<string, []>("op_1409_cast_fp16")];
+            tensor<string, []> var_1411_equation_0 = const()[name = tensor<string, []>("op_1411_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1411_cast_fp16 = einsum(equation = var_1411_equation_0, values = (var_1295_cast_fp16_17, var_1373_cast_fp16))[name = tensor<string, []>("op_1411_cast_fp16")];
+            tensor<string, []> var_1413_equation_0 = const()[name = tensor<string, []>("op_1413_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1413_cast_fp16 = einsum(equation = var_1413_equation_0, values = (var_1295_cast_fp16_18, var_1374_cast_fp16))[name = tensor<string, []>("op_1413_cast_fp16")];
+            tensor<string, []> var_1415_equation_0 = const()[name = tensor<string, []>("op_1415_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1415_cast_fp16 = einsum(equation = var_1415_equation_0, values = (var_1295_cast_fp16_19, var_1375_cast_fp16))[name = tensor<string, []>("op_1415_cast_fp16")];
+            tensor<bool, []> input_45_interleave_0 = const()[name = tensor<string, []>("input_45_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_45_cast_fp16 = concat(axis = var_1200, interleave = input_45_interleave_0, values = (var_1377_cast_fp16, var_1379_cast_fp16, var_1381_cast_fp16, var_1383_cast_fp16, var_1385_cast_fp16, var_1387_cast_fp16, var_1389_cast_fp16, var_1391_cast_fp16, var_1393_cast_fp16, var_1395_cast_fp16, var_1397_cast_fp16, var_1399_cast_fp16, var_1401_cast_fp16, var_1403_cast_fp16, var_1405_cast_fp16, var_1407_cast_fp16, var_1409_cast_fp16, var_1411_cast_fp16, var_1413_cast_fp16, var_1415_cast_fp16))[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<string, []> var_1424_pad_type_0 = const()[name = tensor<string, []>("op_1424_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1424_strides_0 = const()[name = tensor<string, []>("op_1424_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1424_pad_0 = const()[name = tensor<string, []>("op_1424_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1424_dilations_0 = const()[name = tensor<string, []>("op_1424_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1424_groups_0 = const()[name = tensor<string, []>("op_1424_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_4_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(181913152)))];
+            tensor<fp16, [1280]> blocks_4_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185190016)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1424_cast_fp16 = conv(bias = blocks_4_attn_out_bias_to_fp16, dilations = var_1424_dilations_0, groups = var_1424_groups_0, pad = var_1424_pad_0, pad_type = var_1424_pad_type_0, strides = var_1424_strides_0, weight = blocks_4_attn_out_weight_to_fp16, x = input_45_cast_fp16)[name = tensor<string, []>("op_1424_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = var_1424_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, [1]> input_47_axes_0 = const()[name = tensor<string, []>("input_47_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_47_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_47_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185192640)))];
+            tensor<fp16, [1280]> input_47_beta_0_to_fp16 = const()[name = tensor<string, []>("input_47_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185195264)))];
+            tensor<fp16, []> var_1434_to_fp16 = const()[name = tensor<string, []>("op_1434_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, beta = input_47_beta_0_to_fp16, epsilon = var_1434_to_fp16, gamma = input_47_gamma_0_to_fp16, x = inputs_19_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<string, []> input_49_pad_type_0 = const()[name = tensor<string, []>("input_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_49_strides_0 = const()[name = tensor<string, []>("input_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_49_pad_0 = const()[name = tensor<string, []>("input_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_49_dilations_0 = const()[name = tensor<string, []>("input_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_49_groups_0 = const()[name = tensor<string, []>("input_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_4_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185197888)))];
+            tensor<fp16, [5120]> blocks_4_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(198305152)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_49_cast_fp16 = conv(bias = blocks_4_mlp_0_bias_to_fp16, dilations = input_49_dilations_0, groups = input_49_groups_0, pad = input_49_pad_0, pad_type = input_49_pad_type_0, strides = input_49_strides_0, weight = blocks_4_mlp_0_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<string, []> input_51_mode_0 = const()[name = tensor<string, []>("input_51_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_51_cast_fp16 = gelu(mode = input_51_mode_0, x = input_49_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<string, []> var_1460_pad_type_0 = const()[name = tensor<string, []>("op_1460_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1460_strides_0 = const()[name = tensor<string, []>("op_1460_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1460_pad_0 = const()[name = tensor<string, []>("op_1460_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1460_dilations_0 = const()[name = tensor<string, []>("op_1460_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1460_groups_0 = const()[name = tensor<string, []>("op_1460_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_4_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(198315456)))];
+            tensor<fp16, [1280]> blocks_4_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211422720)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1460_cast_fp16 = conv(bias = blocks_4_mlp_2_bias_to_fp16, dilations = var_1460_dilations_0, groups = var_1460_groups_0, pad = var_1460_pad_0, pad_type = var_1460_pad_type_0, strides = var_1460_strides_0, weight = blocks_4_mlp_2_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("op_1460_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = var_1460_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, []> var_1469 = const()[name = tensor<string, []>("op_1469"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_53_axes_0 = const()[name = tensor<string, []>("input_53_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_53_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_53_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211425344)))];
+            tensor<fp16, [1280]> input_53_beta_0_to_fp16 = const()[name = tensor<string, []>("input_53_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211427968)))];
+            tensor<fp16, []> var_1485_to_fp16 = const()[name = tensor<string, []>("op_1485_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_53_cast_fp16 = layer_norm(axes = input_53_axes_0, beta = input_53_beta_0_to_fp16, epsilon = var_1485_to_fp16, gamma = input_53_gamma_0_to_fp16, x = inputs_21_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<string, []> q_11_pad_type_0 = const()[name = tensor<string, []>("q_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_11_strides_0 = const()[name = tensor<string, []>("q_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_11_pad_0 = const()[name = tensor<string, []>("q_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_11_dilations_0 = const()[name = tensor<string, []>("q_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_11_groups_0 = const()[name = tensor<string, []>("q_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_1520_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1520_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211430592)))];
+            tensor<fp16, [1280]> var_1520_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1520_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214707456)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1520_cast_fp16 = conv(bias = var_1520_bias_0_to_fp16, dilations = q_11_dilations_0, groups = q_11_groups_0, pad = q_11_pad_0, pad_type = q_11_pad_type_0, strides = q_11_strides_0, weight = var_1520_weight_0_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_1520_cast_fp16")];
+            tensor<string, []> k_11_pad_type_0 = const()[name = tensor<string, []>("k_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_11_strides_0 = const()[name = tensor<string, []>("k_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_11_pad_0 = const()[name = tensor<string, []>("k_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_11_dilations_0 = const()[name = tensor<string, []>("k_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_11_groups_0 = const()[name = tensor<string, []>("k_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_5_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214710080)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_11_cast_fp16 = conv(dilations = k_11_dilations_0, groups = k_11_groups_0, pad = k_11_pad_0, pad_type = k_11_pad_type_0, strides = k_11_strides_0, weight = blocks_5_attn_key_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("k_11_cast_fp16")];
+            tensor<string, []> var_1518_pad_type_0 = const()[name = tensor<string, []>("op_1518_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1518_strides_0 = const()[name = tensor<string, []>("op_1518_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1518_pad_0 = const()[name = tensor<string, []>("op_1518_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1518_dilations_0 = const()[name = tensor<string, []>("op_1518_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1518_groups_0 = const()[name = tensor<string, []>("op_1518_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_5_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217986944)))];
+            tensor<fp16, [1280]> blocks_5_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(221263808)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1518_cast_fp16 = conv(bias = blocks_5_attn_value_bias_to_fp16, dilations = var_1518_dilations_0, groups = var_1518_groups_0, pad = var_1518_pad_0, pad_type = var_1518_pad_type_0, strides = var_1518_strides_0, weight = blocks_5_attn_value_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_1518_cast_fp16")];
+            tensor<int32, [20]> tile_15 = const()[name = tensor<string, []>("tile_15"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1521_axis_0 = const()[name = tensor<string, []>("op_1521_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1521_cast_fp16_19 = split(axis = var_1521_axis_0, split_sizes = tile_15, x = var_1520_cast_fp16)[name = tensor<string, []>("op_1521_cast_fp16")];
+            tensor<int32, [4]> var_1542_perm_0 = const()[name = tensor<string, []>("op_1542_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_16 = const()[name = tensor<string, []>("tile_16"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1543_axis_0 = const()[name = tensor<string, []>("op_1543_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_1542_cast_fp16 = transpose(perm = var_1542_perm_0, x = k_11_cast_fp16)[name = tensor<string, []>("transpose_27")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_1543_cast_fp16_19 = split(axis = var_1543_axis_0, split_sizes = tile_16, x = var_1542_cast_fp16)[name = tensor<string, []>("op_1543_cast_fp16")];
+            tensor<int32, [20]> tile_17 = const()[name = tensor<string, []>("tile_17"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1564_axis_0 = const()[name = tensor<string, []>("op_1564_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1564_cast_fp16_19 = split(axis = var_1564_axis_0, split_sizes = tile_17, x = var_1518_cast_fp16)[name = tensor<string, []>("op_1564_cast_fp16")];
+            tensor<string, []> aw_201_equation_0 = const()[name = tensor<string, []>("aw_201_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_201_cast_fp16 = einsum(equation = aw_201_equation_0, values = (var_1543_cast_fp16_0, var_1521_cast_fp16_0))[name = tensor<string, []>("aw_201_cast_fp16")];
+            tensor<string, []> aw_203_equation_0 = const()[name = tensor<string, []>("aw_203_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_203_cast_fp16 = einsum(equation = aw_203_equation_0, values = (var_1543_cast_fp16_1, var_1521_cast_fp16_1))[name = tensor<string, []>("aw_203_cast_fp16")];
+            tensor<string, []> aw_205_equation_0 = const()[name = tensor<string, []>("aw_205_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_205_cast_fp16 = einsum(equation = aw_205_equation_0, values = (var_1543_cast_fp16_2, var_1521_cast_fp16_2))[name = tensor<string, []>("aw_205_cast_fp16")];
+            tensor<string, []> aw_207_equation_0 = const()[name = tensor<string, []>("aw_207_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_207_cast_fp16 = einsum(equation = aw_207_equation_0, values = (var_1543_cast_fp16_3, var_1521_cast_fp16_3))[name = tensor<string, []>("aw_207_cast_fp16")];
+            tensor<string, []> aw_209_equation_0 = const()[name = tensor<string, []>("aw_209_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_209_cast_fp16 = einsum(equation = aw_209_equation_0, values = (var_1543_cast_fp16_4, var_1521_cast_fp16_4))[name = tensor<string, []>("aw_209_cast_fp16")];
+            tensor<string, []> aw_211_equation_0 = const()[name = tensor<string, []>("aw_211_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_211_cast_fp16 = einsum(equation = aw_211_equation_0, values = (var_1543_cast_fp16_5, var_1521_cast_fp16_5))[name = tensor<string, []>("aw_211_cast_fp16")];
+            tensor<string, []> aw_213_equation_0 = const()[name = tensor<string, []>("aw_213_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_213_cast_fp16 = einsum(equation = aw_213_equation_0, values = (var_1543_cast_fp16_6, var_1521_cast_fp16_6))[name = tensor<string, []>("aw_213_cast_fp16")];
+            tensor<string, []> aw_215_equation_0 = const()[name = tensor<string, []>("aw_215_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_215_cast_fp16 = einsum(equation = aw_215_equation_0, values = (var_1543_cast_fp16_7, var_1521_cast_fp16_7))[name = tensor<string, []>("aw_215_cast_fp16")];
+            tensor<string, []> aw_217_equation_0 = const()[name = tensor<string, []>("aw_217_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_217_cast_fp16 = einsum(equation = aw_217_equation_0, values = (var_1543_cast_fp16_8, var_1521_cast_fp16_8))[name = tensor<string, []>("aw_217_cast_fp16")];
+            tensor<string, []> aw_219_equation_0 = const()[name = tensor<string, []>("aw_219_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_219_cast_fp16 = einsum(equation = aw_219_equation_0, values = (var_1543_cast_fp16_9, var_1521_cast_fp16_9))[name = tensor<string, []>("aw_219_cast_fp16")];
+            tensor<string, []> aw_221_equation_0 = const()[name = tensor<string, []>("aw_221_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_221_cast_fp16 = einsum(equation = aw_221_equation_0, values = (var_1543_cast_fp16_10, var_1521_cast_fp16_10))[name = tensor<string, []>("aw_221_cast_fp16")];
+            tensor<string, []> aw_223_equation_0 = const()[name = tensor<string, []>("aw_223_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_223_cast_fp16 = einsum(equation = aw_223_equation_0, values = (var_1543_cast_fp16_11, var_1521_cast_fp16_11))[name = tensor<string, []>("aw_223_cast_fp16")];
+            tensor<string, []> aw_225_equation_0 = const()[name = tensor<string, []>("aw_225_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_225_cast_fp16 = einsum(equation = aw_225_equation_0, values = (var_1543_cast_fp16_12, var_1521_cast_fp16_12))[name = tensor<string, []>("aw_225_cast_fp16")];
+            tensor<string, []> aw_227_equation_0 = const()[name = tensor<string, []>("aw_227_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_227_cast_fp16 = einsum(equation = aw_227_equation_0, values = (var_1543_cast_fp16_13, var_1521_cast_fp16_13))[name = tensor<string, []>("aw_227_cast_fp16")];
+            tensor<string, []> aw_229_equation_0 = const()[name = tensor<string, []>("aw_229_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_229_cast_fp16 = einsum(equation = aw_229_equation_0, values = (var_1543_cast_fp16_14, var_1521_cast_fp16_14))[name = tensor<string, []>("aw_229_cast_fp16")];
+            tensor<string, []> aw_231_equation_0 = const()[name = tensor<string, []>("aw_231_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_231_cast_fp16 = einsum(equation = aw_231_equation_0, values = (var_1543_cast_fp16_15, var_1521_cast_fp16_15))[name = tensor<string, []>("aw_231_cast_fp16")];
+            tensor<string, []> aw_233_equation_0 = const()[name = tensor<string, []>("aw_233_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_233_cast_fp16 = einsum(equation = aw_233_equation_0, values = (var_1543_cast_fp16_16, var_1521_cast_fp16_16))[name = tensor<string, []>("aw_233_cast_fp16")];
+            tensor<string, []> aw_235_equation_0 = const()[name = tensor<string, []>("aw_235_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_235_cast_fp16 = einsum(equation = aw_235_equation_0, values = (var_1543_cast_fp16_17, var_1521_cast_fp16_17))[name = tensor<string, []>("aw_235_cast_fp16")];
+            tensor<string, []> aw_237_equation_0 = const()[name = tensor<string, []>("aw_237_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_237_cast_fp16 = einsum(equation = aw_237_equation_0, values = (var_1543_cast_fp16_18, var_1521_cast_fp16_18))[name = tensor<string, []>("aw_237_cast_fp16")];
+            tensor<string, []> aw_239_equation_0 = const()[name = tensor<string, []>("aw_239_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_239_cast_fp16 = einsum(equation = aw_239_equation_0, values = (var_1543_cast_fp16_19, var_1521_cast_fp16_19))[name = tensor<string, []>("aw_239_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1625_cast_fp16 = softmax(axis = var_1469, x = aw_201_cast_fp16)[name = tensor<string, []>("op_1625_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1626_cast_fp16 = softmax(axis = var_1469, x = aw_203_cast_fp16)[name = tensor<string, []>("op_1626_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1627_cast_fp16 = softmax(axis = var_1469, x = aw_205_cast_fp16)[name = tensor<string, []>("op_1627_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1628_cast_fp16 = softmax(axis = var_1469, x = aw_207_cast_fp16)[name = tensor<string, []>("op_1628_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1629_cast_fp16 = softmax(axis = var_1469, x = aw_209_cast_fp16)[name = tensor<string, []>("op_1629_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1630_cast_fp16 = softmax(axis = var_1469, x = aw_211_cast_fp16)[name = tensor<string, []>("op_1630_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1631_cast_fp16 = softmax(axis = var_1469, x = aw_213_cast_fp16)[name = tensor<string, []>("op_1631_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1632_cast_fp16 = softmax(axis = var_1469, x = aw_215_cast_fp16)[name = tensor<string, []>("op_1632_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1633_cast_fp16 = softmax(axis = var_1469, x = aw_217_cast_fp16)[name = tensor<string, []>("op_1633_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1634_cast_fp16 = softmax(axis = var_1469, x = aw_219_cast_fp16)[name = tensor<string, []>("op_1634_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1635_cast_fp16 = softmax(axis = var_1469, x = aw_221_cast_fp16)[name = tensor<string, []>("op_1635_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1636_cast_fp16 = softmax(axis = var_1469, x = aw_223_cast_fp16)[name = tensor<string, []>("op_1636_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1637_cast_fp16 = softmax(axis = var_1469, x = aw_225_cast_fp16)[name = tensor<string, []>("op_1637_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1638_cast_fp16 = softmax(axis = var_1469, x = aw_227_cast_fp16)[name = tensor<string, []>("op_1638_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1639_cast_fp16 = softmax(axis = var_1469, x = aw_229_cast_fp16)[name = tensor<string, []>("op_1639_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1640_cast_fp16 = softmax(axis = var_1469, x = aw_231_cast_fp16)[name = tensor<string, []>("op_1640_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1641_cast_fp16 = softmax(axis = var_1469, x = aw_233_cast_fp16)[name = tensor<string, []>("op_1641_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1642_cast_fp16 = softmax(axis = var_1469, x = aw_235_cast_fp16)[name = tensor<string, []>("op_1642_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1643_cast_fp16 = softmax(axis = var_1469, x = aw_237_cast_fp16)[name = tensor<string, []>("op_1643_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1644_cast_fp16 = softmax(axis = var_1469, x = aw_239_cast_fp16)[name = tensor<string, []>("op_1644_cast_fp16")];
+            tensor<string, []> var_1646_equation_0 = const()[name = tensor<string, []>("op_1646_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1646_cast_fp16 = einsum(equation = var_1646_equation_0, values = (var_1564_cast_fp16_0, var_1625_cast_fp16))[name = tensor<string, []>("op_1646_cast_fp16")];
+            tensor<string, []> var_1648_equation_0 = const()[name = tensor<string, []>("op_1648_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1648_cast_fp16 = einsum(equation = var_1648_equation_0, values = (var_1564_cast_fp16_1, var_1626_cast_fp16))[name = tensor<string, []>("op_1648_cast_fp16")];
+            tensor<string, []> var_1650_equation_0 = const()[name = tensor<string, []>("op_1650_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1650_cast_fp16 = einsum(equation = var_1650_equation_0, values = (var_1564_cast_fp16_2, var_1627_cast_fp16))[name = tensor<string, []>("op_1650_cast_fp16")];
+            tensor<string, []> var_1652_equation_0 = const()[name = tensor<string, []>("op_1652_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1652_cast_fp16 = einsum(equation = var_1652_equation_0, values = (var_1564_cast_fp16_3, var_1628_cast_fp16))[name = tensor<string, []>("op_1652_cast_fp16")];
+            tensor<string, []> var_1654_equation_0 = const()[name = tensor<string, []>("op_1654_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1654_cast_fp16 = einsum(equation = var_1654_equation_0, values = (var_1564_cast_fp16_4, var_1629_cast_fp16))[name = tensor<string, []>("op_1654_cast_fp16")];
+            tensor<string, []> var_1656_equation_0 = const()[name = tensor<string, []>("op_1656_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1656_cast_fp16 = einsum(equation = var_1656_equation_0, values = (var_1564_cast_fp16_5, var_1630_cast_fp16))[name = tensor<string, []>("op_1656_cast_fp16")];
+            tensor<string, []> var_1658_equation_0 = const()[name = tensor<string, []>("op_1658_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1658_cast_fp16 = einsum(equation = var_1658_equation_0, values = (var_1564_cast_fp16_6, var_1631_cast_fp16))[name = tensor<string, []>("op_1658_cast_fp16")];
+            tensor<string, []> var_1660_equation_0 = const()[name = tensor<string, []>("op_1660_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1660_cast_fp16 = einsum(equation = var_1660_equation_0, values = (var_1564_cast_fp16_7, var_1632_cast_fp16))[name = tensor<string, []>("op_1660_cast_fp16")];
+            tensor<string, []> var_1662_equation_0 = const()[name = tensor<string, []>("op_1662_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1662_cast_fp16 = einsum(equation = var_1662_equation_0, values = (var_1564_cast_fp16_8, var_1633_cast_fp16))[name = tensor<string, []>("op_1662_cast_fp16")];
+            tensor<string, []> var_1664_equation_0 = const()[name = tensor<string, []>("op_1664_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1664_cast_fp16 = einsum(equation = var_1664_equation_0, values = (var_1564_cast_fp16_9, var_1634_cast_fp16))[name = tensor<string, []>("op_1664_cast_fp16")];
+            tensor<string, []> var_1666_equation_0 = const()[name = tensor<string, []>("op_1666_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1666_cast_fp16 = einsum(equation = var_1666_equation_0, values = (var_1564_cast_fp16_10, var_1635_cast_fp16))[name = tensor<string, []>("op_1666_cast_fp16")];
+            tensor<string, []> var_1668_equation_0 = const()[name = tensor<string, []>("op_1668_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1668_cast_fp16 = einsum(equation = var_1668_equation_0, values = (var_1564_cast_fp16_11, var_1636_cast_fp16))[name = tensor<string, []>("op_1668_cast_fp16")];
+            tensor<string, []> var_1670_equation_0 = const()[name = tensor<string, []>("op_1670_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1670_cast_fp16 = einsum(equation = var_1670_equation_0, values = (var_1564_cast_fp16_12, var_1637_cast_fp16))[name = tensor<string, []>("op_1670_cast_fp16")];
+            tensor<string, []> var_1672_equation_0 = const()[name = tensor<string, []>("op_1672_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1672_cast_fp16 = einsum(equation = var_1672_equation_0, values = (var_1564_cast_fp16_13, var_1638_cast_fp16))[name = tensor<string, []>("op_1672_cast_fp16")];
+            tensor<string, []> var_1674_equation_0 = const()[name = tensor<string, []>("op_1674_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1674_cast_fp16 = einsum(equation = var_1674_equation_0, values = (var_1564_cast_fp16_14, var_1639_cast_fp16))[name = tensor<string, []>("op_1674_cast_fp16")];
+            tensor<string, []> var_1676_equation_0 = const()[name = tensor<string, []>("op_1676_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1676_cast_fp16 = einsum(equation = var_1676_equation_0, values = (var_1564_cast_fp16_15, var_1640_cast_fp16))[name = tensor<string, []>("op_1676_cast_fp16")];
+            tensor<string, []> var_1678_equation_0 = const()[name = tensor<string, []>("op_1678_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1678_cast_fp16 = einsum(equation = var_1678_equation_0, values = (var_1564_cast_fp16_16, var_1641_cast_fp16))[name = tensor<string, []>("op_1678_cast_fp16")];
+            tensor<string, []> var_1680_equation_0 = const()[name = tensor<string, []>("op_1680_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1680_cast_fp16 = einsum(equation = var_1680_equation_0, values = (var_1564_cast_fp16_17, var_1642_cast_fp16))[name = tensor<string, []>("op_1680_cast_fp16")];
+            tensor<string, []> var_1682_equation_0 = const()[name = tensor<string, []>("op_1682_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1682_cast_fp16 = einsum(equation = var_1682_equation_0, values = (var_1564_cast_fp16_18, var_1643_cast_fp16))[name = tensor<string, []>("op_1682_cast_fp16")];
+            tensor<string, []> var_1684_equation_0 = const()[name = tensor<string, []>("op_1684_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1684_cast_fp16 = einsum(equation = var_1684_equation_0, values = (var_1564_cast_fp16_19, var_1644_cast_fp16))[name = tensor<string, []>("op_1684_cast_fp16")];
+            tensor<bool, []> input_55_interleave_0 = const()[name = tensor<string, []>("input_55_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_55_cast_fp16 = concat(axis = var_1469, interleave = input_55_interleave_0, values = (var_1646_cast_fp16, var_1648_cast_fp16, var_1650_cast_fp16, var_1652_cast_fp16, var_1654_cast_fp16, var_1656_cast_fp16, var_1658_cast_fp16, var_1660_cast_fp16, var_1662_cast_fp16, var_1664_cast_fp16, var_1666_cast_fp16, var_1668_cast_fp16, var_1670_cast_fp16, var_1672_cast_fp16, var_1674_cast_fp16, var_1676_cast_fp16, var_1678_cast_fp16, var_1680_cast_fp16, var_1682_cast_fp16, var_1684_cast_fp16))[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<string, []> var_1693_pad_type_0 = const()[name = tensor<string, []>("op_1693_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1693_strides_0 = const()[name = tensor<string, []>("op_1693_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1693_pad_0 = const()[name = tensor<string, []>("op_1693_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1693_dilations_0 = const()[name = tensor<string, []>("op_1693_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1693_groups_0 = const()[name = tensor<string, []>("op_1693_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_5_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(221266432)))];
+            tensor<fp16, [1280]> blocks_5_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(224543296)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1693_cast_fp16 = conv(bias = blocks_5_attn_out_bias_to_fp16, dilations = var_1693_dilations_0, groups = var_1693_groups_0, pad = var_1693_pad_0, pad_type = var_1693_pad_type_0, strides = var_1693_strides_0, weight = blocks_5_attn_out_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("op_1693_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = var_1693_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> input_57_axes_0 = const()[name = tensor<string, []>("input_57_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_57_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_57_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(224545920)))];
+            tensor<fp16, [1280]> input_57_beta_0_to_fp16 = const()[name = tensor<string, []>("input_57_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(224548544)))];
+            tensor<fp16, []> var_1703_to_fp16 = const()[name = tensor<string, []>("op_1703_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_57_cast_fp16 = layer_norm(axes = input_57_axes_0, beta = input_57_beta_0_to_fp16, epsilon = var_1703_to_fp16, gamma = input_57_gamma_0_to_fp16, x = inputs_23_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<string, []> input_59_pad_type_0 = const()[name = tensor<string, []>("input_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_59_strides_0 = const()[name = tensor<string, []>("input_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_59_pad_0 = const()[name = tensor<string, []>("input_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_59_dilations_0 = const()[name = tensor<string, []>("input_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_59_groups_0 = const()[name = tensor<string, []>("input_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_5_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(224551168)))];
+            tensor<fp16, [5120]> blocks_5_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237658432)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_59_cast_fp16 = conv(bias = blocks_5_mlp_0_bias_to_fp16, dilations = input_59_dilations_0, groups = input_59_groups_0, pad = input_59_pad_0, pad_type = input_59_pad_type_0, strides = input_59_strides_0, weight = blocks_5_mlp_0_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<string, []> input_61_mode_0 = const()[name = tensor<string, []>("input_61_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_61_cast_fp16 = gelu(mode = input_61_mode_0, x = input_59_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
+            tensor<string, []> var_1729_pad_type_0 = const()[name = tensor<string, []>("op_1729_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1729_strides_0 = const()[name = tensor<string, []>("op_1729_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1729_pad_0 = const()[name = tensor<string, []>("op_1729_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1729_dilations_0 = const()[name = tensor<string, []>("op_1729_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1729_groups_0 = const()[name = tensor<string, []>("op_1729_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_5_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(237668736)))];
+            tensor<fp16, [1280]> blocks_5_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250776000)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1729_cast_fp16 = conv(bias = blocks_5_mlp_2_bias_to_fp16, dilations = var_1729_dilations_0, groups = var_1729_groups_0, pad = var_1729_pad_0, pad_type = var_1729_pad_type_0, strides = var_1729_strides_0, weight = blocks_5_mlp_2_weight_to_fp16, x = input_61_cast_fp16)[name = tensor<string, []>("op_1729_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = var_1729_cast_fp16)[name = tensor<string, []>("inputs_25_cast_fp16")];
+            tensor<int32, []> var_1738 = const()[name = tensor<string, []>("op_1738"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_63_axes_0 = const()[name = tensor<string, []>("input_63_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_63_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_63_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250778624)))];
+            tensor<fp16, [1280]> input_63_beta_0_to_fp16 = const()[name = tensor<string, []>("input_63_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250781248)))];
+            tensor<fp16, []> var_1754_to_fp16 = const()[name = tensor<string, []>("op_1754_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_63_cast_fp16 = layer_norm(axes = input_63_axes_0, beta = input_63_beta_0_to_fp16, epsilon = var_1754_to_fp16, gamma = input_63_gamma_0_to_fp16, x = inputs_25_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
+            tensor<string, []> q_13_pad_type_0 = const()[name = tensor<string, []>("q_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_13_strides_0 = const()[name = tensor<string, []>("q_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_13_pad_0 = const()[name = tensor<string, []>("q_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_13_dilations_0 = const()[name = tensor<string, []>("q_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_13_groups_0 = const()[name = tensor<string, []>("q_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_1789_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1789_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(250783872)))];
+            tensor<fp16, [1280]> var_1789_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1789_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254060736)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1789_cast_fp16 = conv(bias = var_1789_bias_0_to_fp16, dilations = q_13_dilations_0, groups = q_13_groups_0, pad = q_13_pad_0, pad_type = q_13_pad_type_0, strides = q_13_strides_0, weight = var_1789_weight_0_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("op_1789_cast_fp16")];
+            tensor<string, []> k_13_pad_type_0 = const()[name = tensor<string, []>("k_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_13_strides_0 = const()[name = tensor<string, []>("k_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_13_pad_0 = const()[name = tensor<string, []>("k_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_13_dilations_0 = const()[name = tensor<string, []>("k_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_13_groups_0 = const()[name = tensor<string, []>("k_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_6_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254063360)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_13_cast_fp16 = conv(dilations = k_13_dilations_0, groups = k_13_groups_0, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = k_13_strides_0, weight = blocks_6_attn_key_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("k_13_cast_fp16")];
+            tensor<string, []> var_1787_pad_type_0 = const()[name = tensor<string, []>("op_1787_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1787_strides_0 = const()[name = tensor<string, []>("op_1787_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1787_pad_0 = const()[name = tensor<string, []>("op_1787_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1787_dilations_0 = const()[name = tensor<string, []>("op_1787_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1787_groups_0 = const()[name = tensor<string, []>("op_1787_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_6_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(257340224)))];
+            tensor<fp16, [1280]> blocks_6_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(260617088)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1787_cast_fp16 = conv(bias = blocks_6_attn_value_bias_to_fp16, dilations = var_1787_dilations_0, groups = var_1787_groups_0, pad = var_1787_pad_0, pad_type = var_1787_pad_type_0, strides = var_1787_strides_0, weight = blocks_6_attn_value_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("op_1787_cast_fp16")];
+            tensor<int32, [20]> tile_18 = const()[name = tensor<string, []>("tile_18"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1790_axis_0 = const()[name = tensor<string, []>("op_1790_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1790_cast_fp16_19 = split(axis = var_1790_axis_0, split_sizes = tile_18, x = var_1789_cast_fp16)[name = tensor<string, []>("op_1790_cast_fp16")];
+            tensor<int32, [4]> var_1811_perm_0 = const()[name = tensor<string, []>("op_1811_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_19 = const()[name = tensor<string, []>("tile_19"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1812_axis_0 = const()[name = tensor<string, []>("op_1812_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_1811_cast_fp16 = transpose(perm = var_1811_perm_0, x = k_13_cast_fp16)[name = tensor<string, []>("transpose_26")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_1812_cast_fp16_19 = split(axis = var_1812_axis_0, split_sizes = tile_19, x = var_1811_cast_fp16)[name = tensor<string, []>("op_1812_cast_fp16")];
+            tensor<int32, [20]> tile_20 = const()[name = tensor<string, []>("tile_20"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1833_axis_0 = const()[name = tensor<string, []>("op_1833_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_1833_cast_fp16_19 = split(axis = var_1833_axis_0, split_sizes = tile_20, x = var_1787_cast_fp16)[name = tensor<string, []>("op_1833_cast_fp16")];
+            tensor<string, []> aw_241_equation_0 = const()[name = tensor<string, []>("aw_241_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_241_cast_fp16 = einsum(equation = aw_241_equation_0, values = (var_1812_cast_fp16_0, var_1790_cast_fp16_0))[name = tensor<string, []>("aw_241_cast_fp16")];
+            tensor<string, []> aw_243_equation_0 = const()[name = tensor<string, []>("aw_243_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_243_cast_fp16 = einsum(equation = aw_243_equation_0, values = (var_1812_cast_fp16_1, var_1790_cast_fp16_1))[name = tensor<string, []>("aw_243_cast_fp16")];
+            tensor<string, []> aw_245_equation_0 = const()[name = tensor<string, []>("aw_245_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_245_cast_fp16 = einsum(equation = aw_245_equation_0, values = (var_1812_cast_fp16_2, var_1790_cast_fp16_2))[name = tensor<string, []>("aw_245_cast_fp16")];
+            tensor<string, []> aw_247_equation_0 = const()[name = tensor<string, []>("aw_247_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_247_cast_fp16 = einsum(equation = aw_247_equation_0, values = (var_1812_cast_fp16_3, var_1790_cast_fp16_3))[name = tensor<string, []>("aw_247_cast_fp16")];
+            tensor<string, []> aw_249_equation_0 = const()[name = tensor<string, []>("aw_249_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_249_cast_fp16 = einsum(equation = aw_249_equation_0, values = (var_1812_cast_fp16_4, var_1790_cast_fp16_4))[name = tensor<string, []>("aw_249_cast_fp16")];
+            tensor<string, []> aw_251_equation_0 = const()[name = tensor<string, []>("aw_251_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_251_cast_fp16 = einsum(equation = aw_251_equation_0, values = (var_1812_cast_fp16_5, var_1790_cast_fp16_5))[name = tensor<string, []>("aw_251_cast_fp16")];
+            tensor<string, []> aw_253_equation_0 = const()[name = tensor<string, []>("aw_253_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_253_cast_fp16 = einsum(equation = aw_253_equation_0, values = (var_1812_cast_fp16_6, var_1790_cast_fp16_6))[name = tensor<string, []>("aw_253_cast_fp16")];
+            tensor<string, []> aw_255_equation_0 = const()[name = tensor<string, []>("aw_255_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_255_cast_fp16 = einsum(equation = aw_255_equation_0, values = (var_1812_cast_fp16_7, var_1790_cast_fp16_7))[name = tensor<string, []>("aw_255_cast_fp16")];
+            tensor<string, []> aw_257_equation_0 = const()[name = tensor<string, []>("aw_257_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_257_cast_fp16 = einsum(equation = aw_257_equation_0, values = (var_1812_cast_fp16_8, var_1790_cast_fp16_8))[name = tensor<string, []>("aw_257_cast_fp16")];
+            tensor<string, []> aw_259_equation_0 = const()[name = tensor<string, []>("aw_259_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_259_cast_fp16 = einsum(equation = aw_259_equation_0, values = (var_1812_cast_fp16_9, var_1790_cast_fp16_9))[name = tensor<string, []>("aw_259_cast_fp16")];
+            tensor<string, []> aw_261_equation_0 = const()[name = tensor<string, []>("aw_261_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_261_cast_fp16 = einsum(equation = aw_261_equation_0, values = (var_1812_cast_fp16_10, var_1790_cast_fp16_10))[name = tensor<string, []>("aw_261_cast_fp16")];
+            tensor<string, []> aw_263_equation_0 = const()[name = tensor<string, []>("aw_263_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_263_cast_fp16 = einsum(equation = aw_263_equation_0, values = (var_1812_cast_fp16_11, var_1790_cast_fp16_11))[name = tensor<string, []>("aw_263_cast_fp16")];
+            tensor<string, []> aw_265_equation_0 = const()[name = tensor<string, []>("aw_265_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_265_cast_fp16 = einsum(equation = aw_265_equation_0, values = (var_1812_cast_fp16_12, var_1790_cast_fp16_12))[name = tensor<string, []>("aw_265_cast_fp16")];
+            tensor<string, []> aw_267_equation_0 = const()[name = tensor<string, []>("aw_267_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_267_cast_fp16 = einsum(equation = aw_267_equation_0, values = (var_1812_cast_fp16_13, var_1790_cast_fp16_13))[name = tensor<string, []>("aw_267_cast_fp16")];
+            tensor<string, []> aw_269_equation_0 = const()[name = tensor<string, []>("aw_269_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_269_cast_fp16 = einsum(equation = aw_269_equation_0, values = (var_1812_cast_fp16_14, var_1790_cast_fp16_14))[name = tensor<string, []>("aw_269_cast_fp16")];
+            tensor<string, []> aw_271_equation_0 = const()[name = tensor<string, []>("aw_271_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_271_cast_fp16 = einsum(equation = aw_271_equation_0, values = (var_1812_cast_fp16_15, var_1790_cast_fp16_15))[name = tensor<string, []>("aw_271_cast_fp16")];
+            tensor<string, []> aw_273_equation_0 = const()[name = tensor<string, []>("aw_273_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_273_cast_fp16 = einsum(equation = aw_273_equation_0, values = (var_1812_cast_fp16_16, var_1790_cast_fp16_16))[name = tensor<string, []>("aw_273_cast_fp16")];
+            tensor<string, []> aw_275_equation_0 = const()[name = tensor<string, []>("aw_275_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_275_cast_fp16 = einsum(equation = aw_275_equation_0, values = (var_1812_cast_fp16_17, var_1790_cast_fp16_17))[name = tensor<string, []>("aw_275_cast_fp16")];
+            tensor<string, []> aw_277_equation_0 = const()[name = tensor<string, []>("aw_277_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_277_cast_fp16 = einsum(equation = aw_277_equation_0, values = (var_1812_cast_fp16_18, var_1790_cast_fp16_18))[name = tensor<string, []>("aw_277_cast_fp16")];
+            tensor<string, []> aw_279_equation_0 = const()[name = tensor<string, []>("aw_279_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_279_cast_fp16 = einsum(equation = aw_279_equation_0, values = (var_1812_cast_fp16_19, var_1790_cast_fp16_19))[name = tensor<string, []>("aw_279_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1894_cast_fp16 = softmax(axis = var_1738, x = aw_241_cast_fp16)[name = tensor<string, []>("op_1894_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1895_cast_fp16 = softmax(axis = var_1738, x = aw_243_cast_fp16)[name = tensor<string, []>("op_1895_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1896_cast_fp16 = softmax(axis = var_1738, x = aw_245_cast_fp16)[name = tensor<string, []>("op_1896_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1897_cast_fp16 = softmax(axis = var_1738, x = aw_247_cast_fp16)[name = tensor<string, []>("op_1897_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1898_cast_fp16 = softmax(axis = var_1738, x = aw_249_cast_fp16)[name = tensor<string, []>("op_1898_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1899_cast_fp16 = softmax(axis = var_1738, x = aw_251_cast_fp16)[name = tensor<string, []>("op_1899_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1900_cast_fp16 = softmax(axis = var_1738, x = aw_253_cast_fp16)[name = tensor<string, []>("op_1900_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1901_cast_fp16 = softmax(axis = var_1738, x = aw_255_cast_fp16)[name = tensor<string, []>("op_1901_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1902_cast_fp16 = softmax(axis = var_1738, x = aw_257_cast_fp16)[name = tensor<string, []>("op_1902_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1903_cast_fp16 = softmax(axis = var_1738, x = aw_259_cast_fp16)[name = tensor<string, []>("op_1903_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1904_cast_fp16 = softmax(axis = var_1738, x = aw_261_cast_fp16)[name = tensor<string, []>("op_1904_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1905_cast_fp16 = softmax(axis = var_1738, x = aw_263_cast_fp16)[name = tensor<string, []>("op_1905_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1906_cast_fp16 = softmax(axis = var_1738, x = aw_265_cast_fp16)[name = tensor<string, []>("op_1906_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1907_cast_fp16 = softmax(axis = var_1738, x = aw_267_cast_fp16)[name = tensor<string, []>("op_1907_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1908_cast_fp16 = softmax(axis = var_1738, x = aw_269_cast_fp16)[name = tensor<string, []>("op_1908_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1909_cast_fp16 = softmax(axis = var_1738, x = aw_271_cast_fp16)[name = tensor<string, []>("op_1909_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1910_cast_fp16 = softmax(axis = var_1738, x = aw_273_cast_fp16)[name = tensor<string, []>("op_1910_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1911_cast_fp16 = softmax(axis = var_1738, x = aw_275_cast_fp16)[name = tensor<string, []>("op_1911_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1912_cast_fp16 = softmax(axis = var_1738, x = aw_277_cast_fp16)[name = tensor<string, []>("op_1912_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1913_cast_fp16 = softmax(axis = var_1738, x = aw_279_cast_fp16)[name = tensor<string, []>("op_1913_cast_fp16")];
+            tensor<string, []> var_1915_equation_0 = const()[name = tensor<string, []>("op_1915_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1915_cast_fp16 = einsum(equation = var_1915_equation_0, values = (var_1833_cast_fp16_0, var_1894_cast_fp16))[name = tensor<string, []>("op_1915_cast_fp16")];
+            tensor<string, []> var_1917_equation_0 = const()[name = tensor<string, []>("op_1917_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1917_cast_fp16 = einsum(equation = var_1917_equation_0, values = (var_1833_cast_fp16_1, var_1895_cast_fp16))[name = tensor<string, []>("op_1917_cast_fp16")];
+            tensor<string, []> var_1919_equation_0 = const()[name = tensor<string, []>("op_1919_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1919_cast_fp16 = einsum(equation = var_1919_equation_0, values = (var_1833_cast_fp16_2, var_1896_cast_fp16))[name = tensor<string, []>("op_1919_cast_fp16")];
+            tensor<string, []> var_1921_equation_0 = const()[name = tensor<string, []>("op_1921_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1921_cast_fp16 = einsum(equation = var_1921_equation_0, values = (var_1833_cast_fp16_3, var_1897_cast_fp16))[name = tensor<string, []>("op_1921_cast_fp16")];
+            tensor<string, []> var_1923_equation_0 = const()[name = tensor<string, []>("op_1923_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1923_cast_fp16 = einsum(equation = var_1923_equation_0, values = (var_1833_cast_fp16_4, var_1898_cast_fp16))[name = tensor<string, []>("op_1923_cast_fp16")];
+            tensor<string, []> var_1925_equation_0 = const()[name = tensor<string, []>("op_1925_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1925_cast_fp16 = einsum(equation = var_1925_equation_0, values = (var_1833_cast_fp16_5, var_1899_cast_fp16))[name = tensor<string, []>("op_1925_cast_fp16")];
+            tensor<string, []> var_1927_equation_0 = const()[name = tensor<string, []>("op_1927_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1927_cast_fp16 = einsum(equation = var_1927_equation_0, values = (var_1833_cast_fp16_6, var_1900_cast_fp16))[name = tensor<string, []>("op_1927_cast_fp16")];
+            tensor<string, []> var_1929_equation_0 = const()[name = tensor<string, []>("op_1929_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1929_cast_fp16 = einsum(equation = var_1929_equation_0, values = (var_1833_cast_fp16_7, var_1901_cast_fp16))[name = tensor<string, []>("op_1929_cast_fp16")];
+            tensor<string, []> var_1931_equation_0 = const()[name = tensor<string, []>("op_1931_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1931_cast_fp16 = einsum(equation = var_1931_equation_0, values = (var_1833_cast_fp16_8, var_1902_cast_fp16))[name = tensor<string, []>("op_1931_cast_fp16")];
+            tensor<string, []> var_1933_equation_0 = const()[name = tensor<string, []>("op_1933_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1933_cast_fp16 = einsum(equation = var_1933_equation_0, values = (var_1833_cast_fp16_9, var_1903_cast_fp16))[name = tensor<string, []>("op_1933_cast_fp16")];
+            tensor<string, []> var_1935_equation_0 = const()[name = tensor<string, []>("op_1935_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1935_cast_fp16 = einsum(equation = var_1935_equation_0, values = (var_1833_cast_fp16_10, var_1904_cast_fp16))[name = tensor<string, []>("op_1935_cast_fp16")];
+            tensor<string, []> var_1937_equation_0 = const()[name = tensor<string, []>("op_1937_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1937_cast_fp16 = einsum(equation = var_1937_equation_0, values = (var_1833_cast_fp16_11, var_1905_cast_fp16))[name = tensor<string, []>("op_1937_cast_fp16")];
+            tensor<string, []> var_1939_equation_0 = const()[name = tensor<string, []>("op_1939_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1939_cast_fp16 = einsum(equation = var_1939_equation_0, values = (var_1833_cast_fp16_12, var_1906_cast_fp16))[name = tensor<string, []>("op_1939_cast_fp16")];
+            tensor<string, []> var_1941_equation_0 = const()[name = tensor<string, []>("op_1941_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1941_cast_fp16 = einsum(equation = var_1941_equation_0, values = (var_1833_cast_fp16_13, var_1907_cast_fp16))[name = tensor<string, []>("op_1941_cast_fp16")];
+            tensor<string, []> var_1943_equation_0 = const()[name = tensor<string, []>("op_1943_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1943_cast_fp16 = einsum(equation = var_1943_equation_0, values = (var_1833_cast_fp16_14, var_1908_cast_fp16))[name = tensor<string, []>("op_1943_cast_fp16")];
+            tensor<string, []> var_1945_equation_0 = const()[name = tensor<string, []>("op_1945_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1945_cast_fp16 = einsum(equation = var_1945_equation_0, values = (var_1833_cast_fp16_15, var_1909_cast_fp16))[name = tensor<string, []>("op_1945_cast_fp16")];
+            tensor<string, []> var_1947_equation_0 = const()[name = tensor<string, []>("op_1947_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1947_cast_fp16 = einsum(equation = var_1947_equation_0, values = (var_1833_cast_fp16_16, var_1910_cast_fp16))[name = tensor<string, []>("op_1947_cast_fp16")];
+            tensor<string, []> var_1949_equation_0 = const()[name = tensor<string, []>("op_1949_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1949_cast_fp16 = einsum(equation = var_1949_equation_0, values = (var_1833_cast_fp16_17, var_1911_cast_fp16))[name = tensor<string, []>("op_1949_cast_fp16")];
+            tensor<string, []> var_1951_equation_0 = const()[name = tensor<string, []>("op_1951_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1951_cast_fp16 = einsum(equation = var_1951_equation_0, values = (var_1833_cast_fp16_18, var_1912_cast_fp16))[name = tensor<string, []>("op_1951_cast_fp16")];
+            tensor<string, []> var_1953_equation_0 = const()[name = tensor<string, []>("op_1953_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1953_cast_fp16 = einsum(equation = var_1953_equation_0, values = (var_1833_cast_fp16_19, var_1913_cast_fp16))[name = tensor<string, []>("op_1953_cast_fp16")];
+            tensor<bool, []> input_65_interleave_0 = const()[name = tensor<string, []>("input_65_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_65_cast_fp16 = concat(axis = var_1738, interleave = input_65_interleave_0, values = (var_1915_cast_fp16, var_1917_cast_fp16, var_1919_cast_fp16, var_1921_cast_fp16, var_1923_cast_fp16, var_1925_cast_fp16, var_1927_cast_fp16, var_1929_cast_fp16, var_1931_cast_fp16, var_1933_cast_fp16, var_1935_cast_fp16, var_1937_cast_fp16, var_1939_cast_fp16, var_1941_cast_fp16, var_1943_cast_fp16, var_1945_cast_fp16, var_1947_cast_fp16, var_1949_cast_fp16, var_1951_cast_fp16, var_1953_cast_fp16))[name = tensor<string, []>("input_65_cast_fp16")];
+            tensor<string, []> var_1962_pad_type_0 = const()[name = tensor<string, []>("op_1962_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1962_strides_0 = const()[name = tensor<string, []>("op_1962_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1962_pad_0 = const()[name = tensor<string, []>("op_1962_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1962_dilations_0 = const()[name = tensor<string, []>("op_1962_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1962_groups_0 = const()[name = tensor<string, []>("op_1962_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_6_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(260619712)))];
+            tensor<fp16, [1280]> blocks_6_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263896576)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1962_cast_fp16 = conv(bias = blocks_6_attn_out_bias_to_fp16, dilations = var_1962_dilations_0, groups = var_1962_groups_0, pad = var_1962_pad_0, pad_type = var_1962_pad_type_0, strides = var_1962_strides_0, weight = blocks_6_attn_out_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("op_1962_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = var_1962_cast_fp16)[name = tensor<string, []>("inputs_27_cast_fp16")];
+            tensor<int32, [1]> input_67_axes_0 = const()[name = tensor<string, []>("input_67_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_67_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_67_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263899200)))];
+            tensor<fp16, [1280]> input_67_beta_0_to_fp16 = const()[name = tensor<string, []>("input_67_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263901824)))];
+            tensor<fp16, []> var_1972_to_fp16 = const()[name = tensor<string, []>("op_1972_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_67_cast_fp16 = layer_norm(axes = input_67_axes_0, beta = input_67_beta_0_to_fp16, epsilon = var_1972_to_fp16, gamma = input_67_gamma_0_to_fp16, x = inputs_27_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
+            tensor<string, []> input_69_pad_type_0 = const()[name = tensor<string, []>("input_69_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_69_strides_0 = const()[name = tensor<string, []>("input_69_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_69_pad_0 = const()[name = tensor<string, []>("input_69_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_69_dilations_0 = const()[name = tensor<string, []>("input_69_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_69_groups_0 = const()[name = tensor<string, []>("input_69_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_6_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263904448)))];
+            tensor<fp16, [5120]> blocks_6_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(277011712)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_69_cast_fp16 = conv(bias = blocks_6_mlp_0_bias_to_fp16, dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = blocks_6_mlp_0_weight_to_fp16, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
+            tensor<string, []> input_71_mode_0 = const()[name = tensor<string, []>("input_71_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
+            tensor<string, []> var_1998_pad_type_0 = const()[name = tensor<string, []>("op_1998_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1998_strides_0 = const()[name = tensor<string, []>("op_1998_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1998_pad_0 = const()[name = tensor<string, []>("op_1998_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1998_dilations_0 = const()[name = tensor<string, []>("op_1998_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1998_groups_0 = const()[name = tensor<string, []>("op_1998_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_6_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(277022016)))];
+            tensor<fp16, [1280]> blocks_6_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290129280)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1998_cast_fp16 = conv(bias = blocks_6_mlp_2_bias_to_fp16, dilations = var_1998_dilations_0, groups = var_1998_groups_0, pad = var_1998_pad_0, pad_type = var_1998_pad_type_0, strides = var_1998_strides_0, weight = blocks_6_mlp_2_weight_to_fp16, x = input_71_cast_fp16)[name = tensor<string, []>("op_1998_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = var_1998_cast_fp16)[name = tensor<string, []>("inputs_29_cast_fp16")];
+            tensor<int32, []> var_2007 = const()[name = tensor<string, []>("op_2007"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_73_axes_0 = const()[name = tensor<string, []>("input_73_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_73_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_73_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290131904)))];
+            tensor<fp16, [1280]> input_73_beta_0_to_fp16 = const()[name = tensor<string, []>("input_73_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290134528)))];
+            tensor<fp16, []> var_2023_to_fp16 = const()[name = tensor<string, []>("op_2023_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_73_cast_fp16 = layer_norm(axes = input_73_axes_0, beta = input_73_beta_0_to_fp16, epsilon = var_2023_to_fp16, gamma = input_73_gamma_0_to_fp16, x = inputs_29_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
+            tensor<string, []> q_15_pad_type_0 = const()[name = tensor<string, []>("q_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_15_strides_0 = const()[name = tensor<string, []>("q_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_15_pad_0 = const()[name = tensor<string, []>("q_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_15_dilations_0 = const()[name = tensor<string, []>("q_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_15_groups_0 = const()[name = tensor<string, []>("q_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_2058_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2058_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(290137152)))];
+            tensor<fp16, [1280]> var_2058_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2058_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293414016)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2058_cast_fp16 = conv(bias = var_2058_bias_0_to_fp16, dilations = q_15_dilations_0, groups = q_15_groups_0, pad = q_15_pad_0, pad_type = q_15_pad_type_0, strides = q_15_strides_0, weight = var_2058_weight_0_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("op_2058_cast_fp16")];
+            tensor<string, []> k_15_pad_type_0 = const()[name = tensor<string, []>("k_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_15_strides_0 = const()[name = tensor<string, []>("k_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_15_pad_0 = const()[name = tensor<string, []>("k_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_15_dilations_0 = const()[name = tensor<string, []>("k_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_15_groups_0 = const()[name = tensor<string, []>("k_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_7_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293416640)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_15_cast_fp16 = conv(dilations = k_15_dilations_0, groups = k_15_groups_0, pad = k_15_pad_0, pad_type = k_15_pad_type_0, strides = k_15_strides_0, weight = blocks_7_attn_key_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("k_15_cast_fp16")];
+            tensor<string, []> var_2056_pad_type_0 = const()[name = tensor<string, []>("op_2056_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2056_strides_0 = const()[name = tensor<string, []>("op_2056_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2056_pad_0 = const()[name = tensor<string, []>("op_2056_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2056_dilations_0 = const()[name = tensor<string, []>("op_2056_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2056_groups_0 = const()[name = tensor<string, []>("op_2056_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_7_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(296693504)))];
+            tensor<fp16, [1280]> blocks_7_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(299970368)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2056_cast_fp16 = conv(bias = blocks_7_attn_value_bias_to_fp16, dilations = var_2056_dilations_0, groups = var_2056_groups_0, pad = var_2056_pad_0, pad_type = var_2056_pad_type_0, strides = var_2056_strides_0, weight = blocks_7_attn_value_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("op_2056_cast_fp16")];
+            tensor<int32, [20]> tile_21 = const()[name = tensor<string, []>("tile_21"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2059_axis_0 = const()[name = tensor<string, []>("op_2059_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2059_cast_fp16_19 = split(axis = var_2059_axis_0, split_sizes = tile_21, x = var_2058_cast_fp16)[name = tensor<string, []>("op_2059_cast_fp16")];
+            tensor<int32, [4]> var_2080_perm_0 = const()[name = tensor<string, []>("op_2080_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_22 = const()[name = tensor<string, []>("tile_22"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2081_axis_0 = const()[name = tensor<string, []>("op_2081_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_2080_cast_fp16 = transpose(perm = var_2080_perm_0, x = k_15_cast_fp16)[name = tensor<string, []>("transpose_25")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_2081_cast_fp16_19 = split(axis = var_2081_axis_0, split_sizes = tile_22, x = var_2080_cast_fp16)[name = tensor<string, []>("op_2081_cast_fp16")];
+            tensor<int32, [20]> tile_23 = const()[name = tensor<string, []>("tile_23"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2102_axis_0 = const()[name = tensor<string, []>("op_2102_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2102_cast_fp16_19 = split(axis = var_2102_axis_0, split_sizes = tile_23, x = var_2056_cast_fp16)[name = tensor<string, []>("op_2102_cast_fp16")];
+            tensor<string, []> aw_281_equation_0 = const()[name = tensor<string, []>("aw_281_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_281_cast_fp16 = einsum(equation = aw_281_equation_0, values = (var_2081_cast_fp16_0, var_2059_cast_fp16_0))[name = tensor<string, []>("aw_281_cast_fp16")];
+            tensor<string, []> aw_283_equation_0 = const()[name = tensor<string, []>("aw_283_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_283_cast_fp16 = einsum(equation = aw_283_equation_0, values = (var_2081_cast_fp16_1, var_2059_cast_fp16_1))[name = tensor<string, []>("aw_283_cast_fp16")];
+            tensor<string, []> aw_285_equation_0 = const()[name = tensor<string, []>("aw_285_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_285_cast_fp16 = einsum(equation = aw_285_equation_0, values = (var_2081_cast_fp16_2, var_2059_cast_fp16_2))[name = tensor<string, []>("aw_285_cast_fp16")];
+            tensor<string, []> aw_287_equation_0 = const()[name = tensor<string, []>("aw_287_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_287_cast_fp16 = einsum(equation = aw_287_equation_0, values = (var_2081_cast_fp16_3, var_2059_cast_fp16_3))[name = tensor<string, []>("aw_287_cast_fp16")];
+            tensor<string, []> aw_289_equation_0 = const()[name = tensor<string, []>("aw_289_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_289_cast_fp16 = einsum(equation = aw_289_equation_0, values = (var_2081_cast_fp16_4, var_2059_cast_fp16_4))[name = tensor<string, []>("aw_289_cast_fp16")];
+            tensor<string, []> aw_291_equation_0 = const()[name = tensor<string, []>("aw_291_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_291_cast_fp16 = einsum(equation = aw_291_equation_0, values = (var_2081_cast_fp16_5, var_2059_cast_fp16_5))[name = tensor<string, []>("aw_291_cast_fp16")];
+            tensor<string, []> aw_293_equation_0 = const()[name = tensor<string, []>("aw_293_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_293_cast_fp16 = einsum(equation = aw_293_equation_0, values = (var_2081_cast_fp16_6, var_2059_cast_fp16_6))[name = tensor<string, []>("aw_293_cast_fp16")];
+            tensor<string, []> aw_295_equation_0 = const()[name = tensor<string, []>("aw_295_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_295_cast_fp16 = einsum(equation = aw_295_equation_0, values = (var_2081_cast_fp16_7, var_2059_cast_fp16_7))[name = tensor<string, []>("aw_295_cast_fp16")];
+            tensor<string, []> aw_297_equation_0 = const()[name = tensor<string, []>("aw_297_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_297_cast_fp16 = einsum(equation = aw_297_equation_0, values = (var_2081_cast_fp16_8, var_2059_cast_fp16_8))[name = tensor<string, []>("aw_297_cast_fp16")];
+            tensor<string, []> aw_299_equation_0 = const()[name = tensor<string, []>("aw_299_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_299_cast_fp16 = einsum(equation = aw_299_equation_0, values = (var_2081_cast_fp16_9, var_2059_cast_fp16_9))[name = tensor<string, []>("aw_299_cast_fp16")];
+            tensor<string, []> aw_301_equation_0 = const()[name = tensor<string, []>("aw_301_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_301_cast_fp16 = einsum(equation = aw_301_equation_0, values = (var_2081_cast_fp16_10, var_2059_cast_fp16_10))[name = tensor<string, []>("aw_301_cast_fp16")];
+            tensor<string, []> aw_303_equation_0 = const()[name = tensor<string, []>("aw_303_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_303_cast_fp16 = einsum(equation = aw_303_equation_0, values = (var_2081_cast_fp16_11, var_2059_cast_fp16_11))[name = tensor<string, []>("aw_303_cast_fp16")];
+            tensor<string, []> aw_305_equation_0 = const()[name = tensor<string, []>("aw_305_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_305_cast_fp16 = einsum(equation = aw_305_equation_0, values = (var_2081_cast_fp16_12, var_2059_cast_fp16_12))[name = tensor<string, []>("aw_305_cast_fp16")];
+            tensor<string, []> aw_307_equation_0 = const()[name = tensor<string, []>("aw_307_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_307_cast_fp16 = einsum(equation = aw_307_equation_0, values = (var_2081_cast_fp16_13, var_2059_cast_fp16_13))[name = tensor<string, []>("aw_307_cast_fp16")];
+            tensor<string, []> aw_309_equation_0 = const()[name = tensor<string, []>("aw_309_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_309_cast_fp16 = einsum(equation = aw_309_equation_0, values = (var_2081_cast_fp16_14, var_2059_cast_fp16_14))[name = tensor<string, []>("aw_309_cast_fp16")];
+            tensor<string, []> aw_311_equation_0 = const()[name = tensor<string, []>("aw_311_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_311_cast_fp16 = einsum(equation = aw_311_equation_0, values = (var_2081_cast_fp16_15, var_2059_cast_fp16_15))[name = tensor<string, []>("aw_311_cast_fp16")];
+            tensor<string, []> aw_313_equation_0 = const()[name = tensor<string, []>("aw_313_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_313_cast_fp16 = einsum(equation = aw_313_equation_0, values = (var_2081_cast_fp16_16, var_2059_cast_fp16_16))[name = tensor<string, []>("aw_313_cast_fp16")];
+            tensor<string, []> aw_315_equation_0 = const()[name = tensor<string, []>("aw_315_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_315_cast_fp16 = einsum(equation = aw_315_equation_0, values = (var_2081_cast_fp16_17, var_2059_cast_fp16_17))[name = tensor<string, []>("aw_315_cast_fp16")];
+            tensor<string, []> aw_317_equation_0 = const()[name = tensor<string, []>("aw_317_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_317_cast_fp16 = einsum(equation = aw_317_equation_0, values = (var_2081_cast_fp16_18, var_2059_cast_fp16_18))[name = tensor<string, []>("aw_317_cast_fp16")];
+            tensor<string, []> aw_319_equation_0 = const()[name = tensor<string, []>("aw_319_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_319_cast_fp16 = einsum(equation = aw_319_equation_0, values = (var_2081_cast_fp16_19, var_2059_cast_fp16_19))[name = tensor<string, []>("aw_319_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2163_cast_fp16 = softmax(axis = var_2007, x = aw_281_cast_fp16)[name = tensor<string, []>("op_2163_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2164_cast_fp16 = softmax(axis = var_2007, x = aw_283_cast_fp16)[name = tensor<string, []>("op_2164_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2165_cast_fp16 = softmax(axis = var_2007, x = aw_285_cast_fp16)[name = tensor<string, []>("op_2165_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2166_cast_fp16 = softmax(axis = var_2007, x = aw_287_cast_fp16)[name = tensor<string, []>("op_2166_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2167_cast_fp16 = softmax(axis = var_2007, x = aw_289_cast_fp16)[name = tensor<string, []>("op_2167_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2168_cast_fp16 = softmax(axis = var_2007, x = aw_291_cast_fp16)[name = tensor<string, []>("op_2168_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2169_cast_fp16 = softmax(axis = var_2007, x = aw_293_cast_fp16)[name = tensor<string, []>("op_2169_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2170_cast_fp16 = softmax(axis = var_2007, x = aw_295_cast_fp16)[name = tensor<string, []>("op_2170_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2171_cast_fp16 = softmax(axis = var_2007, x = aw_297_cast_fp16)[name = tensor<string, []>("op_2171_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2172_cast_fp16 = softmax(axis = var_2007, x = aw_299_cast_fp16)[name = tensor<string, []>("op_2172_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2173_cast_fp16 = softmax(axis = var_2007, x = aw_301_cast_fp16)[name = tensor<string, []>("op_2173_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2174_cast_fp16 = softmax(axis = var_2007, x = aw_303_cast_fp16)[name = tensor<string, []>("op_2174_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2175_cast_fp16 = softmax(axis = var_2007, x = aw_305_cast_fp16)[name = tensor<string, []>("op_2175_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2176_cast_fp16 = softmax(axis = var_2007, x = aw_307_cast_fp16)[name = tensor<string, []>("op_2176_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2177_cast_fp16 = softmax(axis = var_2007, x = aw_309_cast_fp16)[name = tensor<string, []>("op_2177_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2178_cast_fp16 = softmax(axis = var_2007, x = aw_311_cast_fp16)[name = tensor<string, []>("op_2178_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2179_cast_fp16 = softmax(axis = var_2007, x = aw_313_cast_fp16)[name = tensor<string, []>("op_2179_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2180_cast_fp16 = softmax(axis = var_2007, x = aw_315_cast_fp16)[name = tensor<string, []>("op_2180_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2181_cast_fp16 = softmax(axis = var_2007, x = aw_317_cast_fp16)[name = tensor<string, []>("op_2181_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2182_cast_fp16 = softmax(axis = var_2007, x = aw_319_cast_fp16)[name = tensor<string, []>("op_2182_cast_fp16")];
+            tensor<string, []> var_2184_equation_0 = const()[name = tensor<string, []>("op_2184_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2184_cast_fp16 = einsum(equation = var_2184_equation_0, values = (var_2102_cast_fp16_0, var_2163_cast_fp16))[name = tensor<string, []>("op_2184_cast_fp16")];
+            tensor<string, []> var_2186_equation_0 = const()[name = tensor<string, []>("op_2186_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16 = einsum(equation = var_2186_equation_0, values = (var_2102_cast_fp16_1, var_2164_cast_fp16))[name = tensor<string, []>("op_2186_cast_fp16")];
+            tensor<string, []> var_2188_equation_0 = const()[name = tensor<string, []>("op_2188_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2188_cast_fp16 = einsum(equation = var_2188_equation_0, values = (var_2102_cast_fp16_2, var_2165_cast_fp16))[name = tensor<string, []>("op_2188_cast_fp16")];
+            tensor<string, []> var_2190_equation_0 = const()[name = tensor<string, []>("op_2190_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2190_cast_fp16 = einsum(equation = var_2190_equation_0, values = (var_2102_cast_fp16_3, var_2166_cast_fp16))[name = tensor<string, []>("op_2190_cast_fp16")];
+            tensor<string, []> var_2192_equation_0 = const()[name = tensor<string, []>("op_2192_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2192_cast_fp16 = einsum(equation = var_2192_equation_0, values = (var_2102_cast_fp16_4, var_2167_cast_fp16))[name = tensor<string, []>("op_2192_cast_fp16")];
+            tensor<string, []> var_2194_equation_0 = const()[name = tensor<string, []>("op_2194_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2194_cast_fp16 = einsum(equation = var_2194_equation_0, values = (var_2102_cast_fp16_5, var_2168_cast_fp16))[name = tensor<string, []>("op_2194_cast_fp16")];
+            tensor<string, []> var_2196_equation_0 = const()[name = tensor<string, []>("op_2196_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2196_cast_fp16 = einsum(equation = var_2196_equation_0, values = (var_2102_cast_fp16_6, var_2169_cast_fp16))[name = tensor<string, []>("op_2196_cast_fp16")];
+            tensor<string, []> var_2198_equation_0 = const()[name = tensor<string, []>("op_2198_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2198_cast_fp16 = einsum(equation = var_2198_equation_0, values = (var_2102_cast_fp16_7, var_2170_cast_fp16))[name = tensor<string, []>("op_2198_cast_fp16")];
+            tensor<string, []> var_2200_equation_0 = const()[name = tensor<string, []>("op_2200_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2200_cast_fp16 = einsum(equation = var_2200_equation_0, values = (var_2102_cast_fp16_8, var_2171_cast_fp16))[name = tensor<string, []>("op_2200_cast_fp16")];
+            tensor<string, []> var_2202_equation_0 = const()[name = tensor<string, []>("op_2202_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2202_cast_fp16 = einsum(equation = var_2202_equation_0, values = (var_2102_cast_fp16_9, var_2172_cast_fp16))[name = tensor<string, []>("op_2202_cast_fp16")];
+            tensor<string, []> var_2204_equation_0 = const()[name = tensor<string, []>("op_2204_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2204_cast_fp16 = einsum(equation = var_2204_equation_0, values = (var_2102_cast_fp16_10, var_2173_cast_fp16))[name = tensor<string, []>("op_2204_cast_fp16")];
+            tensor<string, []> var_2206_equation_0 = const()[name = tensor<string, []>("op_2206_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2206_cast_fp16 = einsum(equation = var_2206_equation_0, values = (var_2102_cast_fp16_11, var_2174_cast_fp16))[name = tensor<string, []>("op_2206_cast_fp16")];
+            tensor<string, []> var_2208_equation_0 = const()[name = tensor<string, []>("op_2208_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2208_cast_fp16 = einsum(equation = var_2208_equation_0, values = (var_2102_cast_fp16_12, var_2175_cast_fp16))[name = tensor<string, []>("op_2208_cast_fp16")];
+            tensor<string, []> var_2210_equation_0 = const()[name = tensor<string, []>("op_2210_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2210_cast_fp16 = einsum(equation = var_2210_equation_0, values = (var_2102_cast_fp16_13, var_2176_cast_fp16))[name = tensor<string, []>("op_2210_cast_fp16")];
+            tensor<string, []> var_2212_equation_0 = const()[name = tensor<string, []>("op_2212_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2212_cast_fp16 = einsum(equation = var_2212_equation_0, values = (var_2102_cast_fp16_14, var_2177_cast_fp16))[name = tensor<string, []>("op_2212_cast_fp16")];
+            tensor<string, []> var_2214_equation_0 = const()[name = tensor<string, []>("op_2214_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2214_cast_fp16 = einsum(equation = var_2214_equation_0, values = (var_2102_cast_fp16_15, var_2178_cast_fp16))[name = tensor<string, []>("op_2214_cast_fp16")];
+            tensor<string, []> var_2216_equation_0 = const()[name = tensor<string, []>("op_2216_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2216_cast_fp16 = einsum(equation = var_2216_equation_0, values = (var_2102_cast_fp16_16, var_2179_cast_fp16))[name = tensor<string, []>("op_2216_cast_fp16")];
+            tensor<string, []> var_2218_equation_0 = const()[name = tensor<string, []>("op_2218_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2218_cast_fp16 = einsum(equation = var_2218_equation_0, values = (var_2102_cast_fp16_17, var_2180_cast_fp16))[name = tensor<string, []>("op_2218_cast_fp16")];
+            tensor<string, []> var_2220_equation_0 = const()[name = tensor<string, []>("op_2220_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2220_cast_fp16 = einsum(equation = var_2220_equation_0, values = (var_2102_cast_fp16_18, var_2181_cast_fp16))[name = tensor<string, []>("op_2220_cast_fp16")];
+            tensor<string, []> var_2222_equation_0 = const()[name = tensor<string, []>("op_2222_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2222_cast_fp16 = einsum(equation = var_2222_equation_0, values = (var_2102_cast_fp16_19, var_2182_cast_fp16))[name = tensor<string, []>("op_2222_cast_fp16")];
+            tensor<bool, []> input_75_interleave_0 = const()[name = tensor<string, []>("input_75_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_75_cast_fp16 = concat(axis = var_2007, interleave = input_75_interleave_0, values = (var_2184_cast_fp16, var_2186_cast_fp16, var_2188_cast_fp16, var_2190_cast_fp16, var_2192_cast_fp16, var_2194_cast_fp16, var_2196_cast_fp16, var_2198_cast_fp16, var_2200_cast_fp16, var_2202_cast_fp16, var_2204_cast_fp16, var_2206_cast_fp16, var_2208_cast_fp16, var_2210_cast_fp16, var_2212_cast_fp16, var_2214_cast_fp16, var_2216_cast_fp16, var_2218_cast_fp16, var_2220_cast_fp16, var_2222_cast_fp16))[name = tensor<string, []>("input_75_cast_fp16")];
+            tensor<string, []> var_2231_pad_type_0 = const()[name = tensor<string, []>("op_2231_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2231_strides_0 = const()[name = tensor<string, []>("op_2231_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2231_pad_0 = const()[name = tensor<string, []>("op_2231_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2231_dilations_0 = const()[name = tensor<string, []>("op_2231_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2231_groups_0 = const()[name = tensor<string, []>("op_2231_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_7_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(299972992)))];
+            tensor<fp16, [1280]> blocks_7_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303249856)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2231_cast_fp16 = conv(bias = blocks_7_attn_out_bias_to_fp16, dilations = var_2231_dilations_0, groups = var_2231_groups_0, pad = var_2231_pad_0, pad_type = var_2231_pad_type_0, strides = var_2231_strides_0, weight = blocks_7_attn_out_weight_to_fp16, x = input_75_cast_fp16)[name = tensor<string, []>("op_2231_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = var_2231_cast_fp16)[name = tensor<string, []>("inputs_31_cast_fp16")];
+            tensor<int32, [1]> input_77_axes_0 = const()[name = tensor<string, []>("input_77_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_77_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_77_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303252480)))];
+            tensor<fp16, [1280]> input_77_beta_0_to_fp16 = const()[name = tensor<string, []>("input_77_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303255104)))];
+            tensor<fp16, []> var_2241_to_fp16 = const()[name = tensor<string, []>("op_2241_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_77_cast_fp16 = layer_norm(axes = input_77_axes_0, beta = input_77_beta_0_to_fp16, epsilon = var_2241_to_fp16, gamma = input_77_gamma_0_to_fp16, x = inputs_31_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
+            tensor<string, []> input_79_pad_type_0 = const()[name = tensor<string, []>("input_79_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_79_strides_0 = const()[name = tensor<string, []>("input_79_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_79_pad_0 = const()[name = tensor<string, []>("input_79_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_79_dilations_0 = const()[name = tensor<string, []>("input_79_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_79_groups_0 = const()[name = tensor<string, []>("input_79_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_7_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303257728)))];
+            tensor<fp16, [5120]> blocks_7_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(316364992)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_79_cast_fp16 = conv(bias = blocks_7_mlp_0_bias_to_fp16, dilations = input_79_dilations_0, groups = input_79_groups_0, pad = input_79_pad_0, pad_type = input_79_pad_type_0, strides = input_79_strides_0, weight = blocks_7_mlp_0_weight_to_fp16, x = input_77_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
+            tensor<string, []> input_81_mode_0 = const()[name = tensor<string, []>("input_81_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_81_cast_fp16 = gelu(mode = input_81_mode_0, x = input_79_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
+            tensor<string, []> var_2267_pad_type_0 = const()[name = tensor<string, []>("op_2267_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2267_strides_0 = const()[name = tensor<string, []>("op_2267_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2267_pad_0 = const()[name = tensor<string, []>("op_2267_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2267_dilations_0 = const()[name = tensor<string, []>("op_2267_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2267_groups_0 = const()[name = tensor<string, []>("op_2267_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_7_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(316375296)))];
+            tensor<fp16, [1280]> blocks_7_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(329482560)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2267_cast_fp16 = conv(bias = blocks_7_mlp_2_bias_to_fp16, dilations = var_2267_dilations_0, groups = var_2267_groups_0, pad = var_2267_pad_0, pad_type = var_2267_pad_type_0, strides = var_2267_strides_0, weight = blocks_7_mlp_2_weight_to_fp16, x = input_81_cast_fp16)[name = tensor<string, []>("op_2267_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = var_2267_cast_fp16)[name = tensor<string, []>("inputs_33_cast_fp16")];
+            tensor<int32, []> var_2276 = const()[name = tensor<string, []>("op_2276"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_83_axes_0 = const()[name = tensor<string, []>("input_83_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_83_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_83_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(329485184)))];
+            tensor<fp16, [1280]> input_83_beta_0_to_fp16 = const()[name = tensor<string, []>("input_83_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(329487808)))];
+            tensor<fp16, []> var_2292_to_fp16 = const()[name = tensor<string, []>("op_2292_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_83_cast_fp16 = layer_norm(axes = input_83_axes_0, beta = input_83_beta_0_to_fp16, epsilon = var_2292_to_fp16, gamma = input_83_gamma_0_to_fp16, x = inputs_33_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
+            tensor<string, []> q_17_pad_type_0 = const()[name = tensor<string, []>("q_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_17_strides_0 = const()[name = tensor<string, []>("q_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_17_pad_0 = const()[name = tensor<string, []>("q_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_17_dilations_0 = const()[name = tensor<string, []>("q_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_17_groups_0 = const()[name = tensor<string, []>("q_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_2327_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2327_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(329490432)))];
+            tensor<fp16, [1280]> var_2327_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2327_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(332767296)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2327_cast_fp16 = conv(bias = var_2327_bias_0_to_fp16, dilations = q_17_dilations_0, groups = q_17_groups_0, pad = q_17_pad_0, pad_type = q_17_pad_type_0, strides = q_17_strides_0, weight = var_2327_weight_0_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("op_2327_cast_fp16")];
+            tensor<string, []> k_17_pad_type_0 = const()[name = tensor<string, []>("k_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_17_strides_0 = const()[name = tensor<string, []>("k_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_17_pad_0 = const()[name = tensor<string, []>("k_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_17_dilations_0 = const()[name = tensor<string, []>("k_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_17_groups_0 = const()[name = tensor<string, []>("k_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_8_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(332769920)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_17_cast_fp16 = conv(dilations = k_17_dilations_0, groups = k_17_groups_0, pad = k_17_pad_0, pad_type = k_17_pad_type_0, strides = k_17_strides_0, weight = blocks_8_attn_key_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("k_17_cast_fp16")];
+            tensor<string, []> var_2325_pad_type_0 = const()[name = tensor<string, []>("op_2325_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2325_strides_0 = const()[name = tensor<string, []>("op_2325_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2325_pad_0 = const()[name = tensor<string, []>("op_2325_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2325_dilations_0 = const()[name = tensor<string, []>("op_2325_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2325_groups_0 = const()[name = tensor<string, []>("op_2325_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_8_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(336046784)))];
+            tensor<fp16, [1280]> blocks_8_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339323648)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2325_cast_fp16 = conv(bias = blocks_8_attn_value_bias_to_fp16, dilations = var_2325_dilations_0, groups = var_2325_groups_0, pad = var_2325_pad_0, pad_type = var_2325_pad_type_0, strides = var_2325_strides_0, weight = blocks_8_attn_value_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("op_2325_cast_fp16")];
+            tensor<int32, [20]> tile_24 = const()[name = tensor<string, []>("tile_24"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2328_axis_0 = const()[name = tensor<string, []>("op_2328_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_19 = split(axis = var_2328_axis_0, split_sizes = tile_24, x = var_2327_cast_fp16)[name = tensor<string, []>("op_2328_cast_fp16")];
+            tensor<int32, [4]> var_2349_perm_0 = const()[name = tensor<string, []>("op_2349_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_25 = const()[name = tensor<string, []>("tile_25"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2350_axis_0 = const()[name = tensor<string, []>("op_2350_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_2349_cast_fp16 = transpose(perm = var_2349_perm_0, x = k_17_cast_fp16)[name = tensor<string, []>("transpose_24")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_2350_cast_fp16_19 = split(axis = var_2350_axis_0, split_sizes = tile_25, x = var_2349_cast_fp16)[name = tensor<string, []>("op_2350_cast_fp16")];
+            tensor<int32, [20]> tile_26 = const()[name = tensor<string, []>("tile_26"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2371_axis_0 = const()[name = tensor<string, []>("op_2371_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2371_cast_fp16_19 = split(axis = var_2371_axis_0, split_sizes = tile_26, x = var_2325_cast_fp16)[name = tensor<string, []>("op_2371_cast_fp16")];
+            tensor<string, []> aw_321_equation_0 = const()[name = tensor<string, []>("aw_321_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_321_cast_fp16 = einsum(equation = aw_321_equation_0, values = (var_2350_cast_fp16_0, var_2328_cast_fp16_0))[name = tensor<string, []>("aw_321_cast_fp16")];
+            tensor<string, []> aw_323_equation_0 = const()[name = tensor<string, []>("aw_323_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_323_cast_fp16 = einsum(equation = aw_323_equation_0, values = (var_2350_cast_fp16_1, var_2328_cast_fp16_1))[name = tensor<string, []>("aw_323_cast_fp16")];
+            tensor<string, []> aw_325_equation_0 = const()[name = tensor<string, []>("aw_325_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_325_cast_fp16 = einsum(equation = aw_325_equation_0, values = (var_2350_cast_fp16_2, var_2328_cast_fp16_2))[name = tensor<string, []>("aw_325_cast_fp16")];
+            tensor<string, []> aw_327_equation_0 = const()[name = tensor<string, []>("aw_327_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_327_cast_fp16 = einsum(equation = aw_327_equation_0, values = (var_2350_cast_fp16_3, var_2328_cast_fp16_3))[name = tensor<string, []>("aw_327_cast_fp16")];
+            tensor<string, []> aw_329_equation_0 = const()[name = tensor<string, []>("aw_329_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_329_cast_fp16 = einsum(equation = aw_329_equation_0, values = (var_2350_cast_fp16_4, var_2328_cast_fp16_4))[name = tensor<string, []>("aw_329_cast_fp16")];
+            tensor<string, []> aw_331_equation_0 = const()[name = tensor<string, []>("aw_331_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_331_cast_fp16 = einsum(equation = aw_331_equation_0, values = (var_2350_cast_fp16_5, var_2328_cast_fp16_5))[name = tensor<string, []>("aw_331_cast_fp16")];
+            tensor<string, []> aw_333_equation_0 = const()[name = tensor<string, []>("aw_333_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_333_cast_fp16 = einsum(equation = aw_333_equation_0, values = (var_2350_cast_fp16_6, var_2328_cast_fp16_6))[name = tensor<string, []>("aw_333_cast_fp16")];
+            tensor<string, []> aw_335_equation_0 = const()[name = tensor<string, []>("aw_335_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_335_cast_fp16 = einsum(equation = aw_335_equation_0, values = (var_2350_cast_fp16_7, var_2328_cast_fp16_7))[name = tensor<string, []>("aw_335_cast_fp16")];
+            tensor<string, []> aw_337_equation_0 = const()[name = tensor<string, []>("aw_337_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_337_cast_fp16 = einsum(equation = aw_337_equation_0, values = (var_2350_cast_fp16_8, var_2328_cast_fp16_8))[name = tensor<string, []>("aw_337_cast_fp16")];
+            tensor<string, []> aw_339_equation_0 = const()[name = tensor<string, []>("aw_339_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_339_cast_fp16 = einsum(equation = aw_339_equation_0, values = (var_2350_cast_fp16_9, var_2328_cast_fp16_9))[name = tensor<string, []>("aw_339_cast_fp16")];
+            tensor<string, []> aw_341_equation_0 = const()[name = tensor<string, []>("aw_341_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_341_cast_fp16 = einsum(equation = aw_341_equation_0, values = (var_2350_cast_fp16_10, var_2328_cast_fp16_10))[name = tensor<string, []>("aw_341_cast_fp16")];
+            tensor<string, []> aw_343_equation_0 = const()[name = tensor<string, []>("aw_343_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_343_cast_fp16 = einsum(equation = aw_343_equation_0, values = (var_2350_cast_fp16_11, var_2328_cast_fp16_11))[name = tensor<string, []>("aw_343_cast_fp16")];
+            tensor<string, []> aw_345_equation_0 = const()[name = tensor<string, []>("aw_345_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_345_cast_fp16 = einsum(equation = aw_345_equation_0, values = (var_2350_cast_fp16_12, var_2328_cast_fp16_12))[name = tensor<string, []>("aw_345_cast_fp16")];
+            tensor<string, []> aw_347_equation_0 = const()[name = tensor<string, []>("aw_347_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_347_cast_fp16 = einsum(equation = aw_347_equation_0, values = (var_2350_cast_fp16_13, var_2328_cast_fp16_13))[name = tensor<string, []>("aw_347_cast_fp16")];
+            tensor<string, []> aw_349_equation_0 = const()[name = tensor<string, []>("aw_349_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_349_cast_fp16 = einsum(equation = aw_349_equation_0, values = (var_2350_cast_fp16_14, var_2328_cast_fp16_14))[name = tensor<string, []>("aw_349_cast_fp16")];
+            tensor<string, []> aw_351_equation_0 = const()[name = tensor<string, []>("aw_351_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_351_cast_fp16 = einsum(equation = aw_351_equation_0, values = (var_2350_cast_fp16_15, var_2328_cast_fp16_15))[name = tensor<string, []>("aw_351_cast_fp16")];
+            tensor<string, []> aw_353_equation_0 = const()[name = tensor<string, []>("aw_353_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_353_cast_fp16 = einsum(equation = aw_353_equation_0, values = (var_2350_cast_fp16_16, var_2328_cast_fp16_16))[name = tensor<string, []>("aw_353_cast_fp16")];
+            tensor<string, []> aw_355_equation_0 = const()[name = tensor<string, []>("aw_355_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_355_cast_fp16 = einsum(equation = aw_355_equation_0, values = (var_2350_cast_fp16_17, var_2328_cast_fp16_17))[name = tensor<string, []>("aw_355_cast_fp16")];
+            tensor<string, []> aw_357_equation_0 = const()[name = tensor<string, []>("aw_357_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_357_cast_fp16 = einsum(equation = aw_357_equation_0, values = (var_2350_cast_fp16_18, var_2328_cast_fp16_18))[name = tensor<string, []>("aw_357_cast_fp16")];
+            tensor<string, []> aw_359_equation_0 = const()[name = tensor<string, []>("aw_359_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_359_cast_fp16 = einsum(equation = aw_359_equation_0, values = (var_2350_cast_fp16_19, var_2328_cast_fp16_19))[name = tensor<string, []>("aw_359_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2432_cast_fp16 = softmax(axis = var_2276, x = aw_321_cast_fp16)[name = tensor<string, []>("op_2432_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2433_cast_fp16 = softmax(axis = var_2276, x = aw_323_cast_fp16)[name = tensor<string, []>("op_2433_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2434_cast_fp16 = softmax(axis = var_2276, x = aw_325_cast_fp16)[name = tensor<string, []>("op_2434_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2435_cast_fp16 = softmax(axis = var_2276, x = aw_327_cast_fp16)[name = tensor<string, []>("op_2435_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2436_cast_fp16 = softmax(axis = var_2276, x = aw_329_cast_fp16)[name = tensor<string, []>("op_2436_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2437_cast_fp16 = softmax(axis = var_2276, x = aw_331_cast_fp16)[name = tensor<string, []>("op_2437_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2438_cast_fp16 = softmax(axis = var_2276, x = aw_333_cast_fp16)[name = tensor<string, []>("op_2438_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2439_cast_fp16 = softmax(axis = var_2276, x = aw_335_cast_fp16)[name = tensor<string, []>("op_2439_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2440_cast_fp16 = softmax(axis = var_2276, x = aw_337_cast_fp16)[name = tensor<string, []>("op_2440_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2441_cast_fp16 = softmax(axis = var_2276, x = aw_339_cast_fp16)[name = tensor<string, []>("op_2441_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2442_cast_fp16 = softmax(axis = var_2276, x = aw_341_cast_fp16)[name = tensor<string, []>("op_2442_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2443_cast_fp16 = softmax(axis = var_2276, x = aw_343_cast_fp16)[name = tensor<string, []>("op_2443_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2444_cast_fp16 = softmax(axis = var_2276, x = aw_345_cast_fp16)[name = tensor<string, []>("op_2444_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2445_cast_fp16 = softmax(axis = var_2276, x = aw_347_cast_fp16)[name = tensor<string, []>("op_2445_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2446_cast_fp16 = softmax(axis = var_2276, x = aw_349_cast_fp16)[name = tensor<string, []>("op_2446_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2447_cast_fp16 = softmax(axis = var_2276, x = aw_351_cast_fp16)[name = tensor<string, []>("op_2447_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2448_cast_fp16 = softmax(axis = var_2276, x = aw_353_cast_fp16)[name = tensor<string, []>("op_2448_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2449_cast_fp16 = softmax(axis = var_2276, x = aw_355_cast_fp16)[name = tensor<string, []>("op_2449_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2450_cast_fp16 = softmax(axis = var_2276, x = aw_357_cast_fp16)[name = tensor<string, []>("op_2450_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2451_cast_fp16 = softmax(axis = var_2276, x = aw_359_cast_fp16)[name = tensor<string, []>("op_2451_cast_fp16")];
+            tensor<string, []> var_2453_equation_0 = const()[name = tensor<string, []>("op_2453_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2453_cast_fp16 = einsum(equation = var_2453_equation_0, values = (var_2371_cast_fp16_0, var_2432_cast_fp16))[name = tensor<string, []>("op_2453_cast_fp16")];
+            tensor<string, []> var_2455_equation_0 = const()[name = tensor<string, []>("op_2455_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2455_cast_fp16 = einsum(equation = var_2455_equation_0, values = (var_2371_cast_fp16_1, var_2433_cast_fp16))[name = tensor<string, []>("op_2455_cast_fp16")];
+            tensor<string, []> var_2457_equation_0 = const()[name = tensor<string, []>("op_2457_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2457_cast_fp16 = einsum(equation = var_2457_equation_0, values = (var_2371_cast_fp16_2, var_2434_cast_fp16))[name = tensor<string, []>("op_2457_cast_fp16")];
+            tensor<string, []> var_2459_equation_0 = const()[name = tensor<string, []>("op_2459_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2459_cast_fp16 = einsum(equation = var_2459_equation_0, values = (var_2371_cast_fp16_3, var_2435_cast_fp16))[name = tensor<string, []>("op_2459_cast_fp16")];
+            tensor<string, []> var_2461_equation_0 = const()[name = tensor<string, []>("op_2461_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2461_cast_fp16 = einsum(equation = var_2461_equation_0, values = (var_2371_cast_fp16_4, var_2436_cast_fp16))[name = tensor<string, []>("op_2461_cast_fp16")];
+            tensor<string, []> var_2463_equation_0 = const()[name = tensor<string, []>("op_2463_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2463_cast_fp16 = einsum(equation = var_2463_equation_0, values = (var_2371_cast_fp16_5, var_2437_cast_fp16))[name = tensor<string, []>("op_2463_cast_fp16")];
+            tensor<string, []> var_2465_equation_0 = const()[name = tensor<string, []>("op_2465_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2465_cast_fp16 = einsum(equation = var_2465_equation_0, values = (var_2371_cast_fp16_6, var_2438_cast_fp16))[name = tensor<string, []>("op_2465_cast_fp16")];
+            tensor<string, []> var_2467_equation_0 = const()[name = tensor<string, []>("op_2467_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2467_cast_fp16 = einsum(equation = var_2467_equation_0, values = (var_2371_cast_fp16_7, var_2439_cast_fp16))[name = tensor<string, []>("op_2467_cast_fp16")];
+            tensor<string, []> var_2469_equation_0 = const()[name = tensor<string, []>("op_2469_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2469_cast_fp16 = einsum(equation = var_2469_equation_0, values = (var_2371_cast_fp16_8, var_2440_cast_fp16))[name = tensor<string, []>("op_2469_cast_fp16")];
+            tensor<string, []> var_2471_equation_0 = const()[name = tensor<string, []>("op_2471_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2471_cast_fp16 = einsum(equation = var_2471_equation_0, values = (var_2371_cast_fp16_9, var_2441_cast_fp16))[name = tensor<string, []>("op_2471_cast_fp16")];
+            tensor<string, []> var_2473_equation_0 = const()[name = tensor<string, []>("op_2473_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2473_cast_fp16 = einsum(equation = var_2473_equation_0, values = (var_2371_cast_fp16_10, var_2442_cast_fp16))[name = tensor<string, []>("op_2473_cast_fp16")];
+            tensor<string, []> var_2475_equation_0 = const()[name = tensor<string, []>("op_2475_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2475_cast_fp16 = einsum(equation = var_2475_equation_0, values = (var_2371_cast_fp16_11, var_2443_cast_fp16))[name = tensor<string, []>("op_2475_cast_fp16")];
+            tensor<string, []> var_2477_equation_0 = const()[name = tensor<string, []>("op_2477_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2477_cast_fp16 = einsum(equation = var_2477_equation_0, values = (var_2371_cast_fp16_12, var_2444_cast_fp16))[name = tensor<string, []>("op_2477_cast_fp16")];
+            tensor<string, []> var_2479_equation_0 = const()[name = tensor<string, []>("op_2479_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2479_cast_fp16 = einsum(equation = var_2479_equation_0, values = (var_2371_cast_fp16_13, var_2445_cast_fp16))[name = tensor<string, []>("op_2479_cast_fp16")];
+            tensor<string, []> var_2481_equation_0 = const()[name = tensor<string, []>("op_2481_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2481_cast_fp16 = einsum(equation = var_2481_equation_0, values = (var_2371_cast_fp16_14, var_2446_cast_fp16))[name = tensor<string, []>("op_2481_cast_fp16")];
+            tensor<string, []> var_2483_equation_0 = const()[name = tensor<string, []>("op_2483_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2483_cast_fp16 = einsum(equation = var_2483_equation_0, values = (var_2371_cast_fp16_15, var_2447_cast_fp16))[name = tensor<string, []>("op_2483_cast_fp16")];
+            tensor<string, []> var_2485_equation_0 = const()[name = tensor<string, []>("op_2485_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2485_cast_fp16 = einsum(equation = var_2485_equation_0, values = (var_2371_cast_fp16_16, var_2448_cast_fp16))[name = tensor<string, []>("op_2485_cast_fp16")];
+            tensor<string, []> var_2487_equation_0 = const()[name = tensor<string, []>("op_2487_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2487_cast_fp16 = einsum(equation = var_2487_equation_0, values = (var_2371_cast_fp16_17, var_2449_cast_fp16))[name = tensor<string, []>("op_2487_cast_fp16")];
+            tensor<string, []> var_2489_equation_0 = const()[name = tensor<string, []>("op_2489_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2489_cast_fp16 = einsum(equation = var_2489_equation_0, values = (var_2371_cast_fp16_18, var_2450_cast_fp16))[name = tensor<string, []>("op_2489_cast_fp16")];
+            tensor<string, []> var_2491_equation_0 = const()[name = tensor<string, []>("op_2491_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2491_cast_fp16 = einsum(equation = var_2491_equation_0, values = (var_2371_cast_fp16_19, var_2451_cast_fp16))[name = tensor<string, []>("op_2491_cast_fp16")];
+            tensor<bool, []> input_85_interleave_0 = const()[name = tensor<string, []>("input_85_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_85_cast_fp16 = concat(axis = var_2276, interleave = input_85_interleave_0, values = (var_2453_cast_fp16, var_2455_cast_fp16, var_2457_cast_fp16, var_2459_cast_fp16, var_2461_cast_fp16, var_2463_cast_fp16, var_2465_cast_fp16, var_2467_cast_fp16, var_2469_cast_fp16, var_2471_cast_fp16, var_2473_cast_fp16, var_2475_cast_fp16, var_2477_cast_fp16, var_2479_cast_fp16, var_2481_cast_fp16, var_2483_cast_fp16, var_2485_cast_fp16, var_2487_cast_fp16, var_2489_cast_fp16, var_2491_cast_fp16))[name = tensor<string, []>("input_85_cast_fp16")];
+            tensor<string, []> var_2500_pad_type_0 = const()[name = tensor<string, []>("op_2500_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2500_strides_0 = const()[name = tensor<string, []>("op_2500_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2500_pad_0 = const()[name = tensor<string, []>("op_2500_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2500_dilations_0 = const()[name = tensor<string, []>("op_2500_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2500_groups_0 = const()[name = tensor<string, []>("op_2500_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_8_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339326272)))];
+            tensor<fp16, [1280]> blocks_8_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(342603136)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2500_cast_fp16 = conv(bias = blocks_8_attn_out_bias_to_fp16, dilations = var_2500_dilations_0, groups = var_2500_groups_0, pad = var_2500_pad_0, pad_type = var_2500_pad_type_0, strides = var_2500_strides_0, weight = blocks_8_attn_out_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("op_2500_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = var_2500_cast_fp16)[name = tensor<string, []>("inputs_35_cast_fp16")];
+            tensor<int32, [1]> input_87_axes_0 = const()[name = tensor<string, []>("input_87_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_87_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_87_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(342605760)))];
+            tensor<fp16, [1280]> input_87_beta_0_to_fp16 = const()[name = tensor<string, []>("input_87_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(342608384)))];
+            tensor<fp16, []> var_2510_to_fp16 = const()[name = tensor<string, []>("op_2510_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_87_cast_fp16 = layer_norm(axes = input_87_axes_0, beta = input_87_beta_0_to_fp16, epsilon = var_2510_to_fp16, gamma = input_87_gamma_0_to_fp16, x = inputs_35_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
+            tensor<string, []> input_89_pad_type_0 = const()[name = tensor<string, []>("input_89_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_89_strides_0 = const()[name = tensor<string, []>("input_89_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_89_pad_0 = const()[name = tensor<string, []>("input_89_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_89_dilations_0 = const()[name = tensor<string, []>("input_89_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_89_groups_0 = const()[name = tensor<string, []>("input_89_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_8_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(342611008)))];
+            tensor<fp16, [5120]> blocks_8_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(355718272)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_89_cast_fp16 = conv(bias = blocks_8_mlp_0_bias_to_fp16, dilations = input_89_dilations_0, groups = input_89_groups_0, pad = input_89_pad_0, pad_type = input_89_pad_type_0, strides = input_89_strides_0, weight = blocks_8_mlp_0_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
+            tensor<string, []> input_91_mode_0 = const()[name = tensor<string, []>("input_91_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_91_cast_fp16 = gelu(mode = input_91_mode_0, x = input_89_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
+            tensor<string, []> var_2536_pad_type_0 = const()[name = tensor<string, []>("op_2536_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2536_strides_0 = const()[name = tensor<string, []>("op_2536_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2536_pad_0 = const()[name = tensor<string, []>("op_2536_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2536_dilations_0 = const()[name = tensor<string, []>("op_2536_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2536_groups_0 = const()[name = tensor<string, []>("op_2536_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_8_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(355728576)))];
+            tensor<fp16, [1280]> blocks_8_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368835840)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2536_cast_fp16 = conv(bias = blocks_8_mlp_2_bias_to_fp16, dilations = var_2536_dilations_0, groups = var_2536_groups_0, pad = var_2536_pad_0, pad_type = var_2536_pad_type_0, strides = var_2536_strides_0, weight = blocks_8_mlp_2_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("op_2536_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = var_2536_cast_fp16)[name = tensor<string, []>("inputs_37_cast_fp16")];
+            tensor<int32, []> var_2545 = const()[name = tensor<string, []>("op_2545"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_93_axes_0 = const()[name = tensor<string, []>("input_93_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_93_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_93_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368838464)))];
+            tensor<fp16, [1280]> input_93_beta_0_to_fp16 = const()[name = tensor<string, []>("input_93_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368841088)))];
+            tensor<fp16, []> var_2561_to_fp16 = const()[name = tensor<string, []>("op_2561_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_93_cast_fp16 = layer_norm(axes = input_93_axes_0, beta = input_93_beta_0_to_fp16, epsilon = var_2561_to_fp16, gamma = input_93_gamma_0_to_fp16, x = inputs_37_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
+            tensor<string, []> q_19_pad_type_0 = const()[name = tensor<string, []>("q_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_19_strides_0 = const()[name = tensor<string, []>("q_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_19_pad_0 = const()[name = tensor<string, []>("q_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_19_dilations_0 = const()[name = tensor<string, []>("q_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_19_groups_0 = const()[name = tensor<string, []>("q_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_2596_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2596_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368843712)))];
+            tensor<fp16, [1280]> var_2596_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2596_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(372120576)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2596_cast_fp16 = conv(bias = var_2596_bias_0_to_fp16, dilations = q_19_dilations_0, groups = q_19_groups_0, pad = q_19_pad_0, pad_type = q_19_pad_type_0, strides = q_19_strides_0, weight = var_2596_weight_0_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("op_2596_cast_fp16")];
+            tensor<string, []> k_19_pad_type_0 = const()[name = tensor<string, []>("k_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_19_strides_0 = const()[name = tensor<string, []>("k_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_19_pad_0 = const()[name = tensor<string, []>("k_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_19_dilations_0 = const()[name = tensor<string, []>("k_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_19_groups_0 = const()[name = tensor<string, []>("k_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_9_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(372123200)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_19_cast_fp16 = conv(dilations = k_19_dilations_0, groups = k_19_groups_0, pad = k_19_pad_0, pad_type = k_19_pad_type_0, strides = k_19_strides_0, weight = blocks_9_attn_key_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("k_19_cast_fp16")];
+            tensor<string, []> var_2594_pad_type_0 = const()[name = tensor<string, []>("op_2594_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2594_strides_0 = const()[name = tensor<string, []>("op_2594_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2594_pad_0 = const()[name = tensor<string, []>("op_2594_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2594_dilations_0 = const()[name = tensor<string, []>("op_2594_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2594_groups_0 = const()[name = tensor<string, []>("op_2594_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_9_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(375400064)))];
+            tensor<fp16, [1280]> blocks_9_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(378676928)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2594_cast_fp16 = conv(bias = blocks_9_attn_value_bias_to_fp16, dilations = var_2594_dilations_0, groups = var_2594_groups_0, pad = var_2594_pad_0, pad_type = var_2594_pad_type_0, strides = var_2594_strides_0, weight = blocks_9_attn_value_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("op_2594_cast_fp16")];
+            tensor<int32, [20]> tile_27 = const()[name = tensor<string, []>("tile_27"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2597_axis_0 = const()[name = tensor<string, []>("op_2597_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2597_cast_fp16_19 = split(axis = var_2597_axis_0, split_sizes = tile_27, x = var_2596_cast_fp16)[name = tensor<string, []>("op_2597_cast_fp16")];
+            tensor<int32, [4]> var_2618_perm_0 = const()[name = tensor<string, []>("op_2618_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_28 = const()[name = tensor<string, []>("tile_28"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2619_axis_0 = const()[name = tensor<string, []>("op_2619_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_2618_cast_fp16 = transpose(perm = var_2618_perm_0, x = k_19_cast_fp16)[name = tensor<string, []>("transpose_23")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_2619_cast_fp16_19 = split(axis = var_2619_axis_0, split_sizes = tile_28, x = var_2618_cast_fp16)[name = tensor<string, []>("op_2619_cast_fp16")];
+            tensor<int32, [20]> tile_29 = const()[name = tensor<string, []>("tile_29"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2640_axis_0 = const()[name = tensor<string, []>("op_2640_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2640_cast_fp16_19 = split(axis = var_2640_axis_0, split_sizes = tile_29, x = var_2594_cast_fp16)[name = tensor<string, []>("op_2640_cast_fp16")];
+            tensor<string, []> aw_361_equation_0 = const()[name = tensor<string, []>("aw_361_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_361_cast_fp16 = einsum(equation = aw_361_equation_0, values = (var_2619_cast_fp16_0, var_2597_cast_fp16_0))[name = tensor<string, []>("aw_361_cast_fp16")];
+            tensor<string, []> aw_363_equation_0 = const()[name = tensor<string, []>("aw_363_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_363_cast_fp16 = einsum(equation = aw_363_equation_0, values = (var_2619_cast_fp16_1, var_2597_cast_fp16_1))[name = tensor<string, []>("aw_363_cast_fp16")];
+            tensor<string, []> aw_365_equation_0 = const()[name = tensor<string, []>("aw_365_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_365_cast_fp16 = einsum(equation = aw_365_equation_0, values = (var_2619_cast_fp16_2, var_2597_cast_fp16_2))[name = tensor<string, []>("aw_365_cast_fp16")];
+            tensor<string, []> aw_367_equation_0 = const()[name = tensor<string, []>("aw_367_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_367_cast_fp16 = einsum(equation = aw_367_equation_0, values = (var_2619_cast_fp16_3, var_2597_cast_fp16_3))[name = tensor<string, []>("aw_367_cast_fp16")];
+            tensor<string, []> aw_369_equation_0 = const()[name = tensor<string, []>("aw_369_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_369_cast_fp16 = einsum(equation = aw_369_equation_0, values = (var_2619_cast_fp16_4, var_2597_cast_fp16_4))[name = tensor<string, []>("aw_369_cast_fp16")];
+            tensor<string, []> aw_371_equation_0 = const()[name = tensor<string, []>("aw_371_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_371_cast_fp16 = einsum(equation = aw_371_equation_0, values = (var_2619_cast_fp16_5, var_2597_cast_fp16_5))[name = tensor<string, []>("aw_371_cast_fp16")];
+            tensor<string, []> aw_373_equation_0 = const()[name = tensor<string, []>("aw_373_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_373_cast_fp16 = einsum(equation = aw_373_equation_0, values = (var_2619_cast_fp16_6, var_2597_cast_fp16_6))[name = tensor<string, []>("aw_373_cast_fp16")];
+            tensor<string, []> aw_375_equation_0 = const()[name = tensor<string, []>("aw_375_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_375_cast_fp16 = einsum(equation = aw_375_equation_0, values = (var_2619_cast_fp16_7, var_2597_cast_fp16_7))[name = tensor<string, []>("aw_375_cast_fp16")];
+            tensor<string, []> aw_377_equation_0 = const()[name = tensor<string, []>("aw_377_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_377_cast_fp16 = einsum(equation = aw_377_equation_0, values = (var_2619_cast_fp16_8, var_2597_cast_fp16_8))[name = tensor<string, []>("aw_377_cast_fp16")];
+            tensor<string, []> aw_379_equation_0 = const()[name = tensor<string, []>("aw_379_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_379_cast_fp16 = einsum(equation = aw_379_equation_0, values = (var_2619_cast_fp16_9, var_2597_cast_fp16_9))[name = tensor<string, []>("aw_379_cast_fp16")];
+            tensor<string, []> aw_381_equation_0 = const()[name = tensor<string, []>("aw_381_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_381_cast_fp16 = einsum(equation = aw_381_equation_0, values = (var_2619_cast_fp16_10, var_2597_cast_fp16_10))[name = tensor<string, []>("aw_381_cast_fp16")];
+            tensor<string, []> aw_383_equation_0 = const()[name = tensor<string, []>("aw_383_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_383_cast_fp16 = einsum(equation = aw_383_equation_0, values = (var_2619_cast_fp16_11, var_2597_cast_fp16_11))[name = tensor<string, []>("aw_383_cast_fp16")];
+            tensor<string, []> aw_385_equation_0 = const()[name = tensor<string, []>("aw_385_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_385_cast_fp16 = einsum(equation = aw_385_equation_0, values = (var_2619_cast_fp16_12, var_2597_cast_fp16_12))[name = tensor<string, []>("aw_385_cast_fp16")];
+            tensor<string, []> aw_387_equation_0 = const()[name = tensor<string, []>("aw_387_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_387_cast_fp16 = einsum(equation = aw_387_equation_0, values = (var_2619_cast_fp16_13, var_2597_cast_fp16_13))[name = tensor<string, []>("aw_387_cast_fp16")];
+            tensor<string, []> aw_389_equation_0 = const()[name = tensor<string, []>("aw_389_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_389_cast_fp16 = einsum(equation = aw_389_equation_0, values = (var_2619_cast_fp16_14, var_2597_cast_fp16_14))[name = tensor<string, []>("aw_389_cast_fp16")];
+            tensor<string, []> aw_391_equation_0 = const()[name = tensor<string, []>("aw_391_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_391_cast_fp16 = einsum(equation = aw_391_equation_0, values = (var_2619_cast_fp16_15, var_2597_cast_fp16_15))[name = tensor<string, []>("aw_391_cast_fp16")];
+            tensor<string, []> aw_393_equation_0 = const()[name = tensor<string, []>("aw_393_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_393_cast_fp16 = einsum(equation = aw_393_equation_0, values = (var_2619_cast_fp16_16, var_2597_cast_fp16_16))[name = tensor<string, []>("aw_393_cast_fp16")];
+            tensor<string, []> aw_395_equation_0 = const()[name = tensor<string, []>("aw_395_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_395_cast_fp16 = einsum(equation = aw_395_equation_0, values = (var_2619_cast_fp16_17, var_2597_cast_fp16_17))[name = tensor<string, []>("aw_395_cast_fp16")];
+            tensor<string, []> aw_397_equation_0 = const()[name = tensor<string, []>("aw_397_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_397_cast_fp16 = einsum(equation = aw_397_equation_0, values = (var_2619_cast_fp16_18, var_2597_cast_fp16_18))[name = tensor<string, []>("aw_397_cast_fp16")];
+            tensor<string, []> aw_399_equation_0 = const()[name = tensor<string, []>("aw_399_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_399_cast_fp16 = einsum(equation = aw_399_equation_0, values = (var_2619_cast_fp16_19, var_2597_cast_fp16_19))[name = tensor<string, []>("aw_399_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2701_cast_fp16 = softmax(axis = var_2545, x = aw_361_cast_fp16)[name = tensor<string, []>("op_2701_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2702_cast_fp16 = softmax(axis = var_2545, x = aw_363_cast_fp16)[name = tensor<string, []>("op_2702_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2703_cast_fp16 = softmax(axis = var_2545, x = aw_365_cast_fp16)[name = tensor<string, []>("op_2703_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2704_cast_fp16 = softmax(axis = var_2545, x = aw_367_cast_fp16)[name = tensor<string, []>("op_2704_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2705_cast_fp16 = softmax(axis = var_2545, x = aw_369_cast_fp16)[name = tensor<string, []>("op_2705_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2706_cast_fp16 = softmax(axis = var_2545, x = aw_371_cast_fp16)[name = tensor<string, []>("op_2706_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2707_cast_fp16 = softmax(axis = var_2545, x = aw_373_cast_fp16)[name = tensor<string, []>("op_2707_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2708_cast_fp16 = softmax(axis = var_2545, x = aw_375_cast_fp16)[name = tensor<string, []>("op_2708_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2709_cast_fp16 = softmax(axis = var_2545, x = aw_377_cast_fp16)[name = tensor<string, []>("op_2709_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2710_cast_fp16 = softmax(axis = var_2545, x = aw_379_cast_fp16)[name = tensor<string, []>("op_2710_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2711_cast_fp16 = softmax(axis = var_2545, x = aw_381_cast_fp16)[name = tensor<string, []>("op_2711_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2712_cast_fp16 = softmax(axis = var_2545, x = aw_383_cast_fp16)[name = tensor<string, []>("op_2712_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2713_cast_fp16 = softmax(axis = var_2545, x = aw_385_cast_fp16)[name = tensor<string, []>("op_2713_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2714_cast_fp16 = softmax(axis = var_2545, x = aw_387_cast_fp16)[name = tensor<string, []>("op_2714_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2715_cast_fp16 = softmax(axis = var_2545, x = aw_389_cast_fp16)[name = tensor<string, []>("op_2715_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2716_cast_fp16 = softmax(axis = var_2545, x = aw_391_cast_fp16)[name = tensor<string, []>("op_2716_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2717_cast_fp16 = softmax(axis = var_2545, x = aw_393_cast_fp16)[name = tensor<string, []>("op_2717_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2718_cast_fp16 = softmax(axis = var_2545, x = aw_395_cast_fp16)[name = tensor<string, []>("op_2718_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2719_cast_fp16 = softmax(axis = var_2545, x = aw_397_cast_fp16)[name = tensor<string, []>("op_2719_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2720_cast_fp16 = softmax(axis = var_2545, x = aw_399_cast_fp16)[name = tensor<string, []>("op_2720_cast_fp16")];
+            tensor<string, []> var_2722_equation_0 = const()[name = tensor<string, []>("op_2722_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2722_cast_fp16 = einsum(equation = var_2722_equation_0, values = (var_2640_cast_fp16_0, var_2701_cast_fp16))[name = tensor<string, []>("op_2722_cast_fp16")];
+            tensor<string, []> var_2724_equation_0 = const()[name = tensor<string, []>("op_2724_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2724_cast_fp16 = einsum(equation = var_2724_equation_0, values = (var_2640_cast_fp16_1, var_2702_cast_fp16))[name = tensor<string, []>("op_2724_cast_fp16")];
+            tensor<string, []> var_2726_equation_0 = const()[name = tensor<string, []>("op_2726_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2726_cast_fp16 = einsum(equation = var_2726_equation_0, values = (var_2640_cast_fp16_2, var_2703_cast_fp16))[name = tensor<string, []>("op_2726_cast_fp16")];
+            tensor<string, []> var_2728_equation_0 = const()[name = tensor<string, []>("op_2728_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2728_cast_fp16 = einsum(equation = var_2728_equation_0, values = (var_2640_cast_fp16_3, var_2704_cast_fp16))[name = tensor<string, []>("op_2728_cast_fp16")];
+            tensor<string, []> var_2730_equation_0 = const()[name = tensor<string, []>("op_2730_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2730_cast_fp16 = einsum(equation = var_2730_equation_0, values = (var_2640_cast_fp16_4, var_2705_cast_fp16))[name = tensor<string, []>("op_2730_cast_fp16")];
+            tensor<string, []> var_2732_equation_0 = const()[name = tensor<string, []>("op_2732_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2732_cast_fp16 = einsum(equation = var_2732_equation_0, values = (var_2640_cast_fp16_5, var_2706_cast_fp16))[name = tensor<string, []>("op_2732_cast_fp16")];
+            tensor<string, []> var_2734_equation_0 = const()[name = tensor<string, []>("op_2734_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2734_cast_fp16 = einsum(equation = var_2734_equation_0, values = (var_2640_cast_fp16_6, var_2707_cast_fp16))[name = tensor<string, []>("op_2734_cast_fp16")];
+            tensor<string, []> var_2736_equation_0 = const()[name = tensor<string, []>("op_2736_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2736_cast_fp16 = einsum(equation = var_2736_equation_0, values = (var_2640_cast_fp16_7, var_2708_cast_fp16))[name = tensor<string, []>("op_2736_cast_fp16")];
+            tensor<string, []> var_2738_equation_0 = const()[name = tensor<string, []>("op_2738_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2738_cast_fp16 = einsum(equation = var_2738_equation_0, values = (var_2640_cast_fp16_8, var_2709_cast_fp16))[name = tensor<string, []>("op_2738_cast_fp16")];
+            tensor<string, []> var_2740_equation_0 = const()[name = tensor<string, []>("op_2740_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2740_cast_fp16 = einsum(equation = var_2740_equation_0, values = (var_2640_cast_fp16_9, var_2710_cast_fp16))[name = tensor<string, []>("op_2740_cast_fp16")];
+            tensor<string, []> var_2742_equation_0 = const()[name = tensor<string, []>("op_2742_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2742_cast_fp16 = einsum(equation = var_2742_equation_0, values = (var_2640_cast_fp16_10, var_2711_cast_fp16))[name = tensor<string, []>("op_2742_cast_fp16")];
+            tensor<string, []> var_2744_equation_0 = const()[name = tensor<string, []>("op_2744_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2744_cast_fp16 = einsum(equation = var_2744_equation_0, values = (var_2640_cast_fp16_11, var_2712_cast_fp16))[name = tensor<string, []>("op_2744_cast_fp16")];
+            tensor<string, []> var_2746_equation_0 = const()[name = tensor<string, []>("op_2746_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2746_cast_fp16 = einsum(equation = var_2746_equation_0, values = (var_2640_cast_fp16_12, var_2713_cast_fp16))[name = tensor<string, []>("op_2746_cast_fp16")];
+            tensor<string, []> var_2748_equation_0 = const()[name = tensor<string, []>("op_2748_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2748_cast_fp16 = einsum(equation = var_2748_equation_0, values = (var_2640_cast_fp16_13, var_2714_cast_fp16))[name = tensor<string, []>("op_2748_cast_fp16")];
+            tensor<string, []> var_2750_equation_0 = const()[name = tensor<string, []>("op_2750_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2750_cast_fp16 = einsum(equation = var_2750_equation_0, values = (var_2640_cast_fp16_14, var_2715_cast_fp16))[name = tensor<string, []>("op_2750_cast_fp16")];
+            tensor<string, []> var_2752_equation_0 = const()[name = tensor<string, []>("op_2752_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2752_cast_fp16 = einsum(equation = var_2752_equation_0, values = (var_2640_cast_fp16_15, var_2716_cast_fp16))[name = tensor<string, []>("op_2752_cast_fp16")];
+            tensor<string, []> var_2754_equation_0 = const()[name = tensor<string, []>("op_2754_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2754_cast_fp16 = einsum(equation = var_2754_equation_0, values = (var_2640_cast_fp16_16, var_2717_cast_fp16))[name = tensor<string, []>("op_2754_cast_fp16")];
+            tensor<string, []> var_2756_equation_0 = const()[name = tensor<string, []>("op_2756_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2756_cast_fp16 = einsum(equation = var_2756_equation_0, values = (var_2640_cast_fp16_17, var_2718_cast_fp16))[name = tensor<string, []>("op_2756_cast_fp16")];
+            tensor<string, []> var_2758_equation_0 = const()[name = tensor<string, []>("op_2758_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2758_cast_fp16 = einsum(equation = var_2758_equation_0, values = (var_2640_cast_fp16_18, var_2719_cast_fp16))[name = tensor<string, []>("op_2758_cast_fp16")];
+            tensor<string, []> var_2760_equation_0 = const()[name = tensor<string, []>("op_2760_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2760_cast_fp16 = einsum(equation = var_2760_equation_0, values = (var_2640_cast_fp16_19, var_2720_cast_fp16))[name = tensor<string, []>("op_2760_cast_fp16")];
+            tensor<bool, []> input_95_interleave_0 = const()[name = tensor<string, []>("input_95_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_95_cast_fp16 = concat(axis = var_2545, interleave = input_95_interleave_0, values = (var_2722_cast_fp16, var_2724_cast_fp16, var_2726_cast_fp16, var_2728_cast_fp16, var_2730_cast_fp16, var_2732_cast_fp16, var_2734_cast_fp16, var_2736_cast_fp16, var_2738_cast_fp16, var_2740_cast_fp16, var_2742_cast_fp16, var_2744_cast_fp16, var_2746_cast_fp16, var_2748_cast_fp16, var_2750_cast_fp16, var_2752_cast_fp16, var_2754_cast_fp16, var_2756_cast_fp16, var_2758_cast_fp16, var_2760_cast_fp16))[name = tensor<string, []>("input_95_cast_fp16")];
+            tensor<string, []> var_2769_pad_type_0 = const()[name = tensor<string, []>("op_2769_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2769_strides_0 = const()[name = tensor<string, []>("op_2769_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2769_pad_0 = const()[name = tensor<string, []>("op_2769_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2769_dilations_0 = const()[name = tensor<string, []>("op_2769_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2769_groups_0 = const()[name = tensor<string, []>("op_2769_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_9_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(378679552)))];
+            tensor<fp16, [1280]> blocks_9_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(381956416)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2769_cast_fp16 = conv(bias = blocks_9_attn_out_bias_to_fp16, dilations = var_2769_dilations_0, groups = var_2769_groups_0, pad = var_2769_pad_0, pad_type = var_2769_pad_type_0, strides = var_2769_strides_0, weight = blocks_9_attn_out_weight_to_fp16, x = input_95_cast_fp16)[name = tensor<string, []>("op_2769_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = var_2769_cast_fp16)[name = tensor<string, []>("inputs_39_cast_fp16")];
+            tensor<int32, [1]> input_97_axes_0 = const()[name = tensor<string, []>("input_97_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_97_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_97_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(381959040)))];
+            tensor<fp16, [1280]> input_97_beta_0_to_fp16 = const()[name = tensor<string, []>("input_97_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(381961664)))];
+            tensor<fp16, []> var_2779_to_fp16 = const()[name = tensor<string, []>("op_2779_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_97_cast_fp16 = layer_norm(axes = input_97_axes_0, beta = input_97_beta_0_to_fp16, epsilon = var_2779_to_fp16, gamma = input_97_gamma_0_to_fp16, x = inputs_39_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")];
+            tensor<string, []> input_99_pad_type_0 = const()[name = tensor<string, []>("input_99_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_99_strides_0 = const()[name = tensor<string, []>("input_99_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_99_pad_0 = const()[name = tensor<string, []>("input_99_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_99_dilations_0 = const()[name = tensor<string, []>("input_99_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_99_groups_0 = const()[name = tensor<string, []>("input_99_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_9_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(381964288)))];
+            tensor<fp16, [5120]> blocks_9_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(395071552)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_99_cast_fp16 = conv(bias = blocks_9_mlp_0_bias_to_fp16, dilations = input_99_dilations_0, groups = input_99_groups_0, pad = input_99_pad_0, pad_type = input_99_pad_type_0, strides = input_99_strides_0, weight = blocks_9_mlp_0_weight_to_fp16, x = input_97_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
+            tensor<string, []> input_101_mode_0 = const()[name = tensor<string, []>("input_101_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_101_cast_fp16 = gelu(mode = input_101_mode_0, x = input_99_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
+            tensor<string, []> var_2805_pad_type_0 = const()[name = tensor<string, []>("op_2805_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2805_strides_0 = const()[name = tensor<string, []>("op_2805_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2805_pad_0 = const()[name = tensor<string, []>("op_2805_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2805_dilations_0 = const()[name = tensor<string, []>("op_2805_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2805_groups_0 = const()[name = tensor<string, []>("op_2805_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_9_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(395081856)))];
+            tensor<fp16, [1280]> blocks_9_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(408189120)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2805_cast_fp16 = conv(bias = blocks_9_mlp_2_bias_to_fp16, dilations = var_2805_dilations_0, groups = var_2805_groups_0, pad = var_2805_pad_0, pad_type = var_2805_pad_type_0, strides = var_2805_strides_0, weight = blocks_9_mlp_2_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("op_2805_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = var_2805_cast_fp16)[name = tensor<string, []>("inputs_41_cast_fp16")];
+            tensor<int32, []> var_2814 = const()[name = tensor<string, []>("op_2814"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_103_axes_0 = const()[name = tensor<string, []>("input_103_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_103_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_103_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(408191744)))];
+            tensor<fp16, [1280]> input_103_beta_0_to_fp16 = const()[name = tensor<string, []>("input_103_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(408194368)))];
+            tensor<fp16, []> var_2830_to_fp16 = const()[name = tensor<string, []>("op_2830_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_103_cast_fp16 = layer_norm(axes = input_103_axes_0, beta = input_103_beta_0_to_fp16, epsilon = var_2830_to_fp16, gamma = input_103_gamma_0_to_fp16, x = inputs_41_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
+            tensor<string, []> q_21_pad_type_0 = const()[name = tensor<string, []>("q_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_21_strides_0 = const()[name = tensor<string, []>("q_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_21_pad_0 = const()[name = tensor<string, []>("q_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_21_dilations_0 = const()[name = tensor<string, []>("q_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_21_groups_0 = const()[name = tensor<string, []>("q_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_2865_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2865_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(408196992)))];
+            tensor<fp16, [1280]> var_2865_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2865_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(411473856)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2865_cast_fp16 = conv(bias = var_2865_bias_0_to_fp16, dilations = q_21_dilations_0, groups = q_21_groups_0, pad = q_21_pad_0, pad_type = q_21_pad_type_0, strides = q_21_strides_0, weight = var_2865_weight_0_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("op_2865_cast_fp16")];
+            tensor<string, []> k_21_pad_type_0 = const()[name = tensor<string, []>("k_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_21_strides_0 = const()[name = tensor<string, []>("k_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_21_pad_0 = const()[name = tensor<string, []>("k_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_21_dilations_0 = const()[name = tensor<string, []>("k_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_21_groups_0 = const()[name = tensor<string, []>("k_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_10_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(411476480)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_21_cast_fp16 = conv(dilations = k_21_dilations_0, groups = k_21_groups_0, pad = k_21_pad_0, pad_type = k_21_pad_type_0, strides = k_21_strides_0, weight = blocks_10_attn_key_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("k_21_cast_fp16")];
+            tensor<string, []> var_2863_pad_type_0 = const()[name = tensor<string, []>("op_2863_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2863_strides_0 = const()[name = tensor<string, []>("op_2863_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2863_pad_0 = const()[name = tensor<string, []>("op_2863_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2863_dilations_0 = const()[name = tensor<string, []>("op_2863_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2863_groups_0 = const()[name = tensor<string, []>("op_2863_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_10_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(414753344)))];
+            tensor<fp16, [1280]> blocks_10_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(418030208)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2863_cast_fp16 = conv(bias = blocks_10_attn_value_bias_to_fp16, dilations = var_2863_dilations_0, groups = var_2863_groups_0, pad = var_2863_pad_0, pad_type = var_2863_pad_type_0, strides = var_2863_strides_0, weight = blocks_10_attn_value_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("op_2863_cast_fp16")];
+            tensor<int32, [20]> tile_30 = const()[name = tensor<string, []>("tile_30"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2866_axis_0 = const()[name = tensor<string, []>("op_2866_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2866_cast_fp16_19 = split(axis = var_2866_axis_0, split_sizes = tile_30, x = var_2865_cast_fp16)[name = tensor<string, []>("op_2866_cast_fp16")];
+            tensor<int32, [4]> var_2887_perm_0 = const()[name = tensor<string, []>("op_2887_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_31 = const()[name = tensor<string, []>("tile_31"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2888_axis_0 = const()[name = tensor<string, []>("op_2888_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_2887_cast_fp16 = transpose(perm = var_2887_perm_0, x = k_21_cast_fp16)[name = tensor<string, []>("transpose_22")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_2888_cast_fp16_19 = split(axis = var_2888_axis_0, split_sizes = tile_31, x = var_2887_cast_fp16)[name = tensor<string, []>("op_2888_cast_fp16")];
+            tensor<int32, [20]> tile_32 = const()[name = tensor<string, []>("tile_32"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2909_axis_0 = const()[name = tensor<string, []>("op_2909_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_2909_cast_fp16_19 = split(axis = var_2909_axis_0, split_sizes = tile_32, x = var_2863_cast_fp16)[name = tensor<string, []>("op_2909_cast_fp16")];
+            tensor<string, []> aw_401_equation_0 = const()[name = tensor<string, []>("aw_401_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_401_cast_fp16 = einsum(equation = aw_401_equation_0, values = (var_2888_cast_fp16_0, var_2866_cast_fp16_0))[name = tensor<string, []>("aw_401_cast_fp16")];
+            tensor<string, []> aw_403_equation_0 = const()[name = tensor<string, []>("aw_403_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_403_cast_fp16 = einsum(equation = aw_403_equation_0, values = (var_2888_cast_fp16_1, var_2866_cast_fp16_1))[name = tensor<string, []>("aw_403_cast_fp16")];
+            tensor<string, []> aw_405_equation_0 = const()[name = tensor<string, []>("aw_405_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_405_cast_fp16 = einsum(equation = aw_405_equation_0, values = (var_2888_cast_fp16_2, var_2866_cast_fp16_2))[name = tensor<string, []>("aw_405_cast_fp16")];
+            tensor<string, []> aw_407_equation_0 = const()[name = tensor<string, []>("aw_407_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_407_cast_fp16 = einsum(equation = aw_407_equation_0, values = (var_2888_cast_fp16_3, var_2866_cast_fp16_3))[name = tensor<string, []>("aw_407_cast_fp16")];
+            tensor<string, []> aw_409_equation_0 = const()[name = tensor<string, []>("aw_409_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_409_cast_fp16 = einsum(equation = aw_409_equation_0, values = (var_2888_cast_fp16_4, var_2866_cast_fp16_4))[name = tensor<string, []>("aw_409_cast_fp16")];
+            tensor<string, []> aw_411_equation_0 = const()[name = tensor<string, []>("aw_411_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_411_cast_fp16 = einsum(equation = aw_411_equation_0, values = (var_2888_cast_fp16_5, var_2866_cast_fp16_5))[name = tensor<string, []>("aw_411_cast_fp16")];
+            tensor<string, []> aw_413_equation_0 = const()[name = tensor<string, []>("aw_413_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_413_cast_fp16 = einsum(equation = aw_413_equation_0, values = (var_2888_cast_fp16_6, var_2866_cast_fp16_6))[name = tensor<string, []>("aw_413_cast_fp16")];
+            tensor<string, []> aw_415_equation_0 = const()[name = tensor<string, []>("aw_415_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_415_cast_fp16 = einsum(equation = aw_415_equation_0, values = (var_2888_cast_fp16_7, var_2866_cast_fp16_7))[name = tensor<string, []>("aw_415_cast_fp16")];
+            tensor<string, []> aw_417_equation_0 = const()[name = tensor<string, []>("aw_417_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_417_cast_fp16 = einsum(equation = aw_417_equation_0, values = (var_2888_cast_fp16_8, var_2866_cast_fp16_8))[name = tensor<string, []>("aw_417_cast_fp16")];
+            tensor<string, []> aw_419_equation_0 = const()[name = tensor<string, []>("aw_419_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_419_cast_fp16 = einsum(equation = aw_419_equation_0, values = (var_2888_cast_fp16_9, var_2866_cast_fp16_9))[name = tensor<string, []>("aw_419_cast_fp16")];
+            tensor<string, []> aw_421_equation_0 = const()[name = tensor<string, []>("aw_421_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_421_cast_fp16 = einsum(equation = aw_421_equation_0, values = (var_2888_cast_fp16_10, var_2866_cast_fp16_10))[name = tensor<string, []>("aw_421_cast_fp16")];
+            tensor<string, []> aw_423_equation_0 = const()[name = tensor<string, []>("aw_423_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_423_cast_fp16 = einsum(equation = aw_423_equation_0, values = (var_2888_cast_fp16_11, var_2866_cast_fp16_11))[name = tensor<string, []>("aw_423_cast_fp16")];
+            tensor<string, []> aw_425_equation_0 = const()[name = tensor<string, []>("aw_425_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_425_cast_fp16 = einsum(equation = aw_425_equation_0, values = (var_2888_cast_fp16_12, var_2866_cast_fp16_12))[name = tensor<string, []>("aw_425_cast_fp16")];
+            tensor<string, []> aw_427_equation_0 = const()[name = tensor<string, []>("aw_427_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_427_cast_fp16 = einsum(equation = aw_427_equation_0, values = (var_2888_cast_fp16_13, var_2866_cast_fp16_13))[name = tensor<string, []>("aw_427_cast_fp16")];
+            tensor<string, []> aw_429_equation_0 = const()[name = tensor<string, []>("aw_429_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_429_cast_fp16 = einsum(equation = aw_429_equation_0, values = (var_2888_cast_fp16_14, var_2866_cast_fp16_14))[name = tensor<string, []>("aw_429_cast_fp16")];
+            tensor<string, []> aw_431_equation_0 = const()[name = tensor<string, []>("aw_431_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_431_cast_fp16 = einsum(equation = aw_431_equation_0, values = (var_2888_cast_fp16_15, var_2866_cast_fp16_15))[name = tensor<string, []>("aw_431_cast_fp16")];
+            tensor<string, []> aw_433_equation_0 = const()[name = tensor<string, []>("aw_433_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_433_cast_fp16 = einsum(equation = aw_433_equation_0, values = (var_2888_cast_fp16_16, var_2866_cast_fp16_16))[name = tensor<string, []>("aw_433_cast_fp16")];
+            tensor<string, []> aw_435_equation_0 = const()[name = tensor<string, []>("aw_435_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_435_cast_fp16 = einsum(equation = aw_435_equation_0, values = (var_2888_cast_fp16_17, var_2866_cast_fp16_17))[name = tensor<string, []>("aw_435_cast_fp16")];
+            tensor<string, []> aw_437_equation_0 = const()[name = tensor<string, []>("aw_437_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_437_cast_fp16 = einsum(equation = aw_437_equation_0, values = (var_2888_cast_fp16_18, var_2866_cast_fp16_18))[name = tensor<string, []>("aw_437_cast_fp16")];
+            tensor<string, []> aw_439_equation_0 = const()[name = tensor<string, []>("aw_439_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_439_cast_fp16 = einsum(equation = aw_439_equation_0, values = (var_2888_cast_fp16_19, var_2866_cast_fp16_19))[name = tensor<string, []>("aw_439_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2970_cast_fp16 = softmax(axis = var_2814, x = aw_401_cast_fp16)[name = tensor<string, []>("op_2970_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2971_cast_fp16 = softmax(axis = var_2814, x = aw_403_cast_fp16)[name = tensor<string, []>("op_2971_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2972_cast_fp16 = softmax(axis = var_2814, x = aw_405_cast_fp16)[name = tensor<string, []>("op_2972_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2973_cast_fp16 = softmax(axis = var_2814, x = aw_407_cast_fp16)[name = tensor<string, []>("op_2973_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2974_cast_fp16 = softmax(axis = var_2814, x = aw_409_cast_fp16)[name = tensor<string, []>("op_2974_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2975_cast_fp16 = softmax(axis = var_2814, x = aw_411_cast_fp16)[name = tensor<string, []>("op_2975_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2976_cast_fp16 = softmax(axis = var_2814, x = aw_413_cast_fp16)[name = tensor<string, []>("op_2976_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2977_cast_fp16 = softmax(axis = var_2814, x = aw_415_cast_fp16)[name = tensor<string, []>("op_2977_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2978_cast_fp16 = softmax(axis = var_2814, x = aw_417_cast_fp16)[name = tensor<string, []>("op_2978_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2979_cast_fp16 = softmax(axis = var_2814, x = aw_419_cast_fp16)[name = tensor<string, []>("op_2979_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2980_cast_fp16 = softmax(axis = var_2814, x = aw_421_cast_fp16)[name = tensor<string, []>("op_2980_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2981_cast_fp16 = softmax(axis = var_2814, x = aw_423_cast_fp16)[name = tensor<string, []>("op_2981_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2982_cast_fp16 = softmax(axis = var_2814, x = aw_425_cast_fp16)[name = tensor<string, []>("op_2982_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2983_cast_fp16 = softmax(axis = var_2814, x = aw_427_cast_fp16)[name = tensor<string, []>("op_2983_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2984_cast_fp16 = softmax(axis = var_2814, x = aw_429_cast_fp16)[name = tensor<string, []>("op_2984_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2985_cast_fp16 = softmax(axis = var_2814, x = aw_431_cast_fp16)[name = tensor<string, []>("op_2985_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2986_cast_fp16 = softmax(axis = var_2814, x = aw_433_cast_fp16)[name = tensor<string, []>("op_2986_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2987_cast_fp16 = softmax(axis = var_2814, x = aw_435_cast_fp16)[name = tensor<string, []>("op_2987_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2988_cast_fp16 = softmax(axis = var_2814, x = aw_437_cast_fp16)[name = tensor<string, []>("op_2988_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2989_cast_fp16 = softmax(axis = var_2814, x = aw_439_cast_fp16)[name = tensor<string, []>("op_2989_cast_fp16")];
+            tensor<string, []> var_2991_equation_0 = const()[name = tensor<string, []>("op_2991_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2991_cast_fp16 = einsum(equation = var_2991_equation_0, values = (var_2909_cast_fp16_0, var_2970_cast_fp16))[name = tensor<string, []>("op_2991_cast_fp16")];
+            tensor<string, []> var_2993_equation_0 = const()[name = tensor<string, []>("op_2993_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2993_cast_fp16 = einsum(equation = var_2993_equation_0, values = (var_2909_cast_fp16_1, var_2971_cast_fp16))[name = tensor<string, []>("op_2993_cast_fp16")];
+            tensor<string, []> var_2995_equation_0 = const()[name = tensor<string, []>("op_2995_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2995_cast_fp16 = einsum(equation = var_2995_equation_0, values = (var_2909_cast_fp16_2, var_2972_cast_fp16))[name = tensor<string, []>("op_2995_cast_fp16")];
+            tensor<string, []> var_2997_equation_0 = const()[name = tensor<string, []>("op_2997_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2997_cast_fp16 = einsum(equation = var_2997_equation_0, values = (var_2909_cast_fp16_3, var_2973_cast_fp16))[name = tensor<string, []>("op_2997_cast_fp16")];
+            tensor<string, []> var_2999_equation_0 = const()[name = tensor<string, []>("op_2999_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2999_cast_fp16 = einsum(equation = var_2999_equation_0, values = (var_2909_cast_fp16_4, var_2974_cast_fp16))[name = tensor<string, []>("op_2999_cast_fp16")];
+            tensor<string, []> var_3001_equation_0 = const()[name = tensor<string, []>("op_3001_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3001_cast_fp16 = einsum(equation = var_3001_equation_0, values = (var_2909_cast_fp16_5, var_2975_cast_fp16))[name = tensor<string, []>("op_3001_cast_fp16")];
+            tensor<string, []> var_3003_equation_0 = const()[name = tensor<string, []>("op_3003_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3003_cast_fp16 = einsum(equation = var_3003_equation_0, values = (var_2909_cast_fp16_6, var_2976_cast_fp16))[name = tensor<string, []>("op_3003_cast_fp16")];
+            tensor<string, []> var_3005_equation_0 = const()[name = tensor<string, []>("op_3005_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3005_cast_fp16 = einsum(equation = var_3005_equation_0, values = (var_2909_cast_fp16_7, var_2977_cast_fp16))[name = tensor<string, []>("op_3005_cast_fp16")];
+            tensor<string, []> var_3007_equation_0 = const()[name = tensor<string, []>("op_3007_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3007_cast_fp16 = einsum(equation = var_3007_equation_0, values = (var_2909_cast_fp16_8, var_2978_cast_fp16))[name = tensor<string, []>("op_3007_cast_fp16")];
+            tensor<string, []> var_3009_equation_0 = const()[name = tensor<string, []>("op_3009_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3009_cast_fp16 = einsum(equation = var_3009_equation_0, values = (var_2909_cast_fp16_9, var_2979_cast_fp16))[name = tensor<string, []>("op_3009_cast_fp16")];
+            tensor<string, []> var_3011_equation_0 = const()[name = tensor<string, []>("op_3011_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3011_cast_fp16 = einsum(equation = var_3011_equation_0, values = (var_2909_cast_fp16_10, var_2980_cast_fp16))[name = tensor<string, []>("op_3011_cast_fp16")];
+            tensor<string, []> var_3013_equation_0 = const()[name = tensor<string, []>("op_3013_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3013_cast_fp16 = einsum(equation = var_3013_equation_0, values = (var_2909_cast_fp16_11, var_2981_cast_fp16))[name = tensor<string, []>("op_3013_cast_fp16")];
+            tensor<string, []> var_3015_equation_0 = const()[name = tensor<string, []>("op_3015_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3015_cast_fp16 = einsum(equation = var_3015_equation_0, values = (var_2909_cast_fp16_12, var_2982_cast_fp16))[name = tensor<string, []>("op_3015_cast_fp16")];
+            tensor<string, []> var_3017_equation_0 = const()[name = tensor<string, []>("op_3017_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3017_cast_fp16 = einsum(equation = var_3017_equation_0, values = (var_2909_cast_fp16_13, var_2983_cast_fp16))[name = tensor<string, []>("op_3017_cast_fp16")];
+            tensor<string, []> var_3019_equation_0 = const()[name = tensor<string, []>("op_3019_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3019_cast_fp16 = einsum(equation = var_3019_equation_0, values = (var_2909_cast_fp16_14, var_2984_cast_fp16))[name = tensor<string, []>("op_3019_cast_fp16")];
+            tensor<string, []> var_3021_equation_0 = const()[name = tensor<string, []>("op_3021_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3021_cast_fp16 = einsum(equation = var_3021_equation_0, values = (var_2909_cast_fp16_15, var_2985_cast_fp16))[name = tensor<string, []>("op_3021_cast_fp16")];
+            tensor<string, []> var_3023_equation_0 = const()[name = tensor<string, []>("op_3023_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3023_cast_fp16 = einsum(equation = var_3023_equation_0, values = (var_2909_cast_fp16_16, var_2986_cast_fp16))[name = tensor<string, []>("op_3023_cast_fp16")];
+            tensor<string, []> var_3025_equation_0 = const()[name = tensor<string, []>("op_3025_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3025_cast_fp16 = einsum(equation = var_3025_equation_0, values = (var_2909_cast_fp16_17, var_2987_cast_fp16))[name = tensor<string, []>("op_3025_cast_fp16")];
+            tensor<string, []> var_3027_equation_0 = const()[name = tensor<string, []>("op_3027_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3027_cast_fp16 = einsum(equation = var_3027_equation_0, values = (var_2909_cast_fp16_18, var_2988_cast_fp16))[name = tensor<string, []>("op_3027_cast_fp16")];
+            tensor<string, []> var_3029_equation_0 = const()[name = tensor<string, []>("op_3029_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3029_cast_fp16 = einsum(equation = var_3029_equation_0, values = (var_2909_cast_fp16_19, var_2989_cast_fp16))[name = tensor<string, []>("op_3029_cast_fp16")];
+            tensor<bool, []> input_105_interleave_0 = const()[name = tensor<string, []>("input_105_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_105_cast_fp16 = concat(axis = var_2814, interleave = input_105_interleave_0, values = (var_2991_cast_fp16, var_2993_cast_fp16, var_2995_cast_fp16, var_2997_cast_fp16, var_2999_cast_fp16, var_3001_cast_fp16, var_3003_cast_fp16, var_3005_cast_fp16, var_3007_cast_fp16, var_3009_cast_fp16, var_3011_cast_fp16, var_3013_cast_fp16, var_3015_cast_fp16, var_3017_cast_fp16, var_3019_cast_fp16, var_3021_cast_fp16, var_3023_cast_fp16, var_3025_cast_fp16, var_3027_cast_fp16, var_3029_cast_fp16))[name = tensor<string, []>("input_105_cast_fp16")];
+            tensor<string, []> var_3038_pad_type_0 = const()[name = tensor<string, []>("op_3038_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3038_strides_0 = const()[name = tensor<string, []>("op_3038_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3038_pad_0 = const()[name = tensor<string, []>("op_3038_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3038_dilations_0 = const()[name = tensor<string, []>("op_3038_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3038_groups_0 = const()[name = tensor<string, []>("op_3038_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_10_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(418032832)))];
+            tensor<fp16, [1280]> blocks_10_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(421309696)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3038_cast_fp16 = conv(bias = blocks_10_attn_out_bias_to_fp16, dilations = var_3038_dilations_0, groups = var_3038_groups_0, pad = var_3038_pad_0, pad_type = var_3038_pad_type_0, strides = var_3038_strides_0, weight = blocks_10_attn_out_weight_to_fp16, x = input_105_cast_fp16)[name = tensor<string, []>("op_3038_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = var_3038_cast_fp16)[name = tensor<string, []>("inputs_43_cast_fp16")];
+            tensor<int32, [1]> input_107_axes_0 = const()[name = tensor<string, []>("input_107_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_107_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_107_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(421312320)))];
+            tensor<fp16, [1280]> input_107_beta_0_to_fp16 = const()[name = tensor<string, []>("input_107_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(421314944)))];
+            tensor<fp16, []> var_3048_to_fp16 = const()[name = tensor<string, []>("op_3048_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_107_cast_fp16 = layer_norm(axes = input_107_axes_0, beta = input_107_beta_0_to_fp16, epsilon = var_3048_to_fp16, gamma = input_107_gamma_0_to_fp16, x = inputs_43_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
+            tensor<string, []> input_109_pad_type_0 = const()[name = tensor<string, []>("input_109_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_109_strides_0 = const()[name = tensor<string, []>("input_109_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_109_pad_0 = const()[name = tensor<string, []>("input_109_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_109_dilations_0 = const()[name = tensor<string, []>("input_109_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_109_groups_0 = const()[name = tensor<string, []>("input_109_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_10_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(421317568)))];
+            tensor<fp16, [5120]> blocks_10_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434424832)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_109_cast_fp16 = conv(bias = blocks_10_mlp_0_bias_to_fp16, dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = blocks_10_mlp_0_weight_to_fp16, x = input_107_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
+            tensor<string, []> input_111_mode_0 = const()[name = tensor<string, []>("input_111_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_111_cast_fp16 = gelu(mode = input_111_mode_0, x = input_109_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
+            tensor<string, []> var_3074_pad_type_0 = const()[name = tensor<string, []>("op_3074_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3074_strides_0 = const()[name = tensor<string, []>("op_3074_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3074_pad_0 = const()[name = tensor<string, []>("op_3074_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3074_dilations_0 = const()[name = tensor<string, []>("op_3074_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3074_groups_0 = const()[name = tensor<string, []>("op_3074_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_10_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(434435136)))];
+            tensor<fp16, [1280]> blocks_10_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447542400)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3074_cast_fp16 = conv(bias = blocks_10_mlp_2_bias_to_fp16, dilations = var_3074_dilations_0, groups = var_3074_groups_0, pad = var_3074_pad_0, pad_type = var_3074_pad_type_0, strides = var_3074_strides_0, weight = blocks_10_mlp_2_weight_to_fp16, x = input_111_cast_fp16)[name = tensor<string, []>("op_3074_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = var_3074_cast_fp16)[name = tensor<string, []>("inputs_45_cast_fp16")];
+            tensor<int32, []> var_3083 = const()[name = tensor<string, []>("op_3083"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_113_axes_0 = const()[name = tensor<string, []>("input_113_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_113_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_113_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447545024)))];
+            tensor<fp16, [1280]> input_113_beta_0_to_fp16 = const()[name = tensor<string, []>("input_113_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447547648)))];
+            tensor<fp16, []> var_3099_to_fp16 = const()[name = tensor<string, []>("op_3099_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_113_cast_fp16 = layer_norm(axes = input_113_axes_0, beta = input_113_beta_0_to_fp16, epsilon = var_3099_to_fp16, gamma = input_113_gamma_0_to_fp16, x = inputs_45_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
+            tensor<string, []> q_23_pad_type_0 = const()[name = tensor<string, []>("q_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_23_strides_0 = const()[name = tensor<string, []>("q_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_23_pad_0 = const()[name = tensor<string, []>("q_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_23_dilations_0 = const()[name = tensor<string, []>("q_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_23_groups_0 = const()[name = tensor<string, []>("q_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_3134_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3134_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(447550272)))];
+            tensor<fp16, [1280]> var_3134_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3134_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(450827136)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3134_cast_fp16 = conv(bias = var_3134_bias_0_to_fp16, dilations = q_23_dilations_0, groups = q_23_groups_0, pad = q_23_pad_0, pad_type = q_23_pad_type_0, strides = q_23_strides_0, weight = var_3134_weight_0_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("op_3134_cast_fp16")];
+            tensor<string, []> k_23_pad_type_0 = const()[name = tensor<string, []>("k_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_23_strides_0 = const()[name = tensor<string, []>("k_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_23_pad_0 = const()[name = tensor<string, []>("k_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_23_dilations_0 = const()[name = tensor<string, []>("k_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_23_groups_0 = const()[name = tensor<string, []>("k_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_11_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(450829760)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_23_cast_fp16 = conv(dilations = k_23_dilations_0, groups = k_23_groups_0, pad = k_23_pad_0, pad_type = k_23_pad_type_0, strides = k_23_strides_0, weight = blocks_11_attn_key_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("k_23_cast_fp16")];
+            tensor<string, []> var_3132_pad_type_0 = const()[name = tensor<string, []>("op_3132_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3132_strides_0 = const()[name = tensor<string, []>("op_3132_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3132_pad_0 = const()[name = tensor<string, []>("op_3132_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3132_dilations_0 = const()[name = tensor<string, []>("op_3132_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3132_groups_0 = const()[name = tensor<string, []>("op_3132_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_11_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(454106624)))];
+            tensor<fp16, [1280]> blocks_11_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(457383488)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3132_cast_fp16 = conv(bias = blocks_11_attn_value_bias_to_fp16, dilations = var_3132_dilations_0, groups = var_3132_groups_0, pad = var_3132_pad_0, pad_type = var_3132_pad_type_0, strides = var_3132_strides_0, weight = blocks_11_attn_value_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("op_3132_cast_fp16")];
+            tensor<int32, [20]> tile_33 = const()[name = tensor<string, []>("tile_33"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3135_axis_0 = const()[name = tensor<string, []>("op_3135_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16_19 = split(axis = var_3135_axis_0, split_sizes = tile_33, x = var_3134_cast_fp16)[name = tensor<string, []>("op_3135_cast_fp16")];
+            tensor<int32, [4]> var_3156_perm_0 = const()[name = tensor<string, []>("op_3156_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_34 = const()[name = tensor<string, []>("tile_34"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3157_axis_0 = const()[name = tensor<string, []>("op_3157_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_3156_cast_fp16 = transpose(perm = var_3156_perm_0, x = k_23_cast_fp16)[name = tensor<string, []>("transpose_21")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_3157_cast_fp16_19 = split(axis = var_3157_axis_0, split_sizes = tile_34, x = var_3156_cast_fp16)[name = tensor<string, []>("op_3157_cast_fp16")];
+            tensor<int32, [20]> tile_35 = const()[name = tensor<string, []>("tile_35"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3178_axis_0 = const()[name = tensor<string, []>("op_3178_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3178_cast_fp16_19 = split(axis = var_3178_axis_0, split_sizes = tile_35, x = var_3132_cast_fp16)[name = tensor<string, []>("op_3178_cast_fp16")];
+            tensor<string, []> aw_441_equation_0 = const()[name = tensor<string, []>("aw_441_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_441_cast_fp16 = einsum(equation = aw_441_equation_0, values = (var_3157_cast_fp16_0, var_3135_cast_fp16_0))[name = tensor<string, []>("aw_441_cast_fp16")];
+            tensor<string, []> aw_443_equation_0 = const()[name = tensor<string, []>("aw_443_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_443_cast_fp16 = einsum(equation = aw_443_equation_0, values = (var_3157_cast_fp16_1, var_3135_cast_fp16_1))[name = tensor<string, []>("aw_443_cast_fp16")];
+            tensor<string, []> aw_445_equation_0 = const()[name = tensor<string, []>("aw_445_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_445_cast_fp16 = einsum(equation = aw_445_equation_0, values = (var_3157_cast_fp16_2, var_3135_cast_fp16_2))[name = tensor<string, []>("aw_445_cast_fp16")];
+            tensor<string, []> aw_447_equation_0 = const()[name = tensor<string, []>("aw_447_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_447_cast_fp16 = einsum(equation = aw_447_equation_0, values = (var_3157_cast_fp16_3, var_3135_cast_fp16_3))[name = tensor<string, []>("aw_447_cast_fp16")];
+            tensor<string, []> aw_449_equation_0 = const()[name = tensor<string, []>("aw_449_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_449_cast_fp16 = einsum(equation = aw_449_equation_0, values = (var_3157_cast_fp16_4, var_3135_cast_fp16_4))[name = tensor<string, []>("aw_449_cast_fp16")];
+            tensor<string, []> aw_451_equation_0 = const()[name = tensor<string, []>("aw_451_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_451_cast_fp16 = einsum(equation = aw_451_equation_0, values = (var_3157_cast_fp16_5, var_3135_cast_fp16_5))[name = tensor<string, []>("aw_451_cast_fp16")];
+            tensor<string, []> aw_453_equation_0 = const()[name = tensor<string, []>("aw_453_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_453_cast_fp16 = einsum(equation = aw_453_equation_0, values = (var_3157_cast_fp16_6, var_3135_cast_fp16_6))[name = tensor<string, []>("aw_453_cast_fp16")];
+            tensor<string, []> aw_455_equation_0 = const()[name = tensor<string, []>("aw_455_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_455_cast_fp16 = einsum(equation = aw_455_equation_0, values = (var_3157_cast_fp16_7, var_3135_cast_fp16_7))[name = tensor<string, []>("aw_455_cast_fp16")];
+            tensor<string, []> aw_457_equation_0 = const()[name = tensor<string, []>("aw_457_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_457_cast_fp16 = einsum(equation = aw_457_equation_0, values = (var_3157_cast_fp16_8, var_3135_cast_fp16_8))[name = tensor<string, []>("aw_457_cast_fp16")];
+            tensor<string, []> aw_459_equation_0 = const()[name = tensor<string, []>("aw_459_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_459_cast_fp16 = einsum(equation = aw_459_equation_0, values = (var_3157_cast_fp16_9, var_3135_cast_fp16_9))[name = tensor<string, []>("aw_459_cast_fp16")];
+            tensor<string, []> aw_461_equation_0 = const()[name = tensor<string, []>("aw_461_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_461_cast_fp16 = einsum(equation = aw_461_equation_0, values = (var_3157_cast_fp16_10, var_3135_cast_fp16_10))[name = tensor<string, []>("aw_461_cast_fp16")];
+            tensor<string, []> aw_463_equation_0 = const()[name = tensor<string, []>("aw_463_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_463_cast_fp16 = einsum(equation = aw_463_equation_0, values = (var_3157_cast_fp16_11, var_3135_cast_fp16_11))[name = tensor<string, []>("aw_463_cast_fp16")];
+            tensor<string, []> aw_465_equation_0 = const()[name = tensor<string, []>("aw_465_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_465_cast_fp16 = einsum(equation = aw_465_equation_0, values = (var_3157_cast_fp16_12, var_3135_cast_fp16_12))[name = tensor<string, []>("aw_465_cast_fp16")];
+            tensor<string, []> aw_467_equation_0 = const()[name = tensor<string, []>("aw_467_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_467_cast_fp16 = einsum(equation = aw_467_equation_0, values = (var_3157_cast_fp16_13, var_3135_cast_fp16_13))[name = tensor<string, []>("aw_467_cast_fp16")];
+            tensor<string, []> aw_469_equation_0 = const()[name = tensor<string, []>("aw_469_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_469_cast_fp16 = einsum(equation = aw_469_equation_0, values = (var_3157_cast_fp16_14, var_3135_cast_fp16_14))[name = tensor<string, []>("aw_469_cast_fp16")];
+            tensor<string, []> aw_471_equation_0 = const()[name = tensor<string, []>("aw_471_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_471_cast_fp16 = einsum(equation = aw_471_equation_0, values = (var_3157_cast_fp16_15, var_3135_cast_fp16_15))[name = tensor<string, []>("aw_471_cast_fp16")];
+            tensor<string, []> aw_473_equation_0 = const()[name = tensor<string, []>("aw_473_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_473_cast_fp16 = einsum(equation = aw_473_equation_0, values = (var_3157_cast_fp16_16, var_3135_cast_fp16_16))[name = tensor<string, []>("aw_473_cast_fp16")];
+            tensor<string, []> aw_475_equation_0 = const()[name = tensor<string, []>("aw_475_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_475_cast_fp16 = einsum(equation = aw_475_equation_0, values = (var_3157_cast_fp16_17, var_3135_cast_fp16_17))[name = tensor<string, []>("aw_475_cast_fp16")];
+            tensor<string, []> aw_477_equation_0 = const()[name = tensor<string, []>("aw_477_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_477_cast_fp16 = einsum(equation = aw_477_equation_0, values = (var_3157_cast_fp16_18, var_3135_cast_fp16_18))[name = tensor<string, []>("aw_477_cast_fp16")];
+            tensor<string, []> aw_479_equation_0 = const()[name = tensor<string, []>("aw_479_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_479_cast_fp16 = einsum(equation = aw_479_equation_0, values = (var_3157_cast_fp16_19, var_3135_cast_fp16_19))[name = tensor<string, []>("aw_479_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3239_cast_fp16 = softmax(axis = var_3083, x = aw_441_cast_fp16)[name = tensor<string, []>("op_3239_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3240_cast_fp16 = softmax(axis = var_3083, x = aw_443_cast_fp16)[name = tensor<string, []>("op_3240_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3241_cast_fp16 = softmax(axis = var_3083, x = aw_445_cast_fp16)[name = tensor<string, []>("op_3241_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3242_cast_fp16 = softmax(axis = var_3083, x = aw_447_cast_fp16)[name = tensor<string, []>("op_3242_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3243_cast_fp16 = softmax(axis = var_3083, x = aw_449_cast_fp16)[name = tensor<string, []>("op_3243_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3244_cast_fp16 = softmax(axis = var_3083, x = aw_451_cast_fp16)[name = tensor<string, []>("op_3244_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3245_cast_fp16 = softmax(axis = var_3083, x = aw_453_cast_fp16)[name = tensor<string, []>("op_3245_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3246_cast_fp16 = softmax(axis = var_3083, x = aw_455_cast_fp16)[name = tensor<string, []>("op_3246_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3247_cast_fp16 = softmax(axis = var_3083, x = aw_457_cast_fp16)[name = tensor<string, []>("op_3247_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3248_cast_fp16 = softmax(axis = var_3083, x = aw_459_cast_fp16)[name = tensor<string, []>("op_3248_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3249_cast_fp16 = softmax(axis = var_3083, x = aw_461_cast_fp16)[name = tensor<string, []>("op_3249_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3250_cast_fp16 = softmax(axis = var_3083, x = aw_463_cast_fp16)[name = tensor<string, []>("op_3250_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3251_cast_fp16 = softmax(axis = var_3083, x = aw_465_cast_fp16)[name = tensor<string, []>("op_3251_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3252_cast_fp16 = softmax(axis = var_3083, x = aw_467_cast_fp16)[name = tensor<string, []>("op_3252_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3253_cast_fp16 = softmax(axis = var_3083, x = aw_469_cast_fp16)[name = tensor<string, []>("op_3253_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3254_cast_fp16 = softmax(axis = var_3083, x = aw_471_cast_fp16)[name = tensor<string, []>("op_3254_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3255_cast_fp16 = softmax(axis = var_3083, x = aw_473_cast_fp16)[name = tensor<string, []>("op_3255_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3256_cast_fp16 = softmax(axis = var_3083, x = aw_475_cast_fp16)[name = tensor<string, []>("op_3256_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3257_cast_fp16 = softmax(axis = var_3083, x = aw_477_cast_fp16)[name = tensor<string, []>("op_3257_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3258_cast_fp16 = softmax(axis = var_3083, x = aw_479_cast_fp16)[name = tensor<string, []>("op_3258_cast_fp16")];
+            tensor<string, []> var_3260_equation_0 = const()[name = tensor<string, []>("op_3260_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3260_cast_fp16 = einsum(equation = var_3260_equation_0, values = (var_3178_cast_fp16_0, var_3239_cast_fp16))[name = tensor<string, []>("op_3260_cast_fp16")];
+            tensor<string, []> var_3262_equation_0 = const()[name = tensor<string, []>("op_3262_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3262_cast_fp16 = einsum(equation = var_3262_equation_0, values = (var_3178_cast_fp16_1, var_3240_cast_fp16))[name = tensor<string, []>("op_3262_cast_fp16")];
+            tensor<string, []> var_3264_equation_0 = const()[name = tensor<string, []>("op_3264_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3264_cast_fp16 = einsum(equation = var_3264_equation_0, values = (var_3178_cast_fp16_2, var_3241_cast_fp16))[name = tensor<string, []>("op_3264_cast_fp16")];
+            tensor<string, []> var_3266_equation_0 = const()[name = tensor<string, []>("op_3266_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3266_cast_fp16 = einsum(equation = var_3266_equation_0, values = (var_3178_cast_fp16_3, var_3242_cast_fp16))[name = tensor<string, []>("op_3266_cast_fp16")];
+            tensor<string, []> var_3268_equation_0 = const()[name = tensor<string, []>("op_3268_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3268_cast_fp16 = einsum(equation = var_3268_equation_0, values = (var_3178_cast_fp16_4, var_3243_cast_fp16))[name = tensor<string, []>("op_3268_cast_fp16")];
+            tensor<string, []> var_3270_equation_0 = const()[name = tensor<string, []>("op_3270_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3270_cast_fp16 = einsum(equation = var_3270_equation_0, values = (var_3178_cast_fp16_5, var_3244_cast_fp16))[name = tensor<string, []>("op_3270_cast_fp16")];
+            tensor<string, []> var_3272_equation_0 = const()[name = tensor<string, []>("op_3272_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3272_cast_fp16 = einsum(equation = var_3272_equation_0, values = (var_3178_cast_fp16_6, var_3245_cast_fp16))[name = tensor<string, []>("op_3272_cast_fp16")];
+            tensor<string, []> var_3274_equation_0 = const()[name = tensor<string, []>("op_3274_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3274_cast_fp16 = einsum(equation = var_3274_equation_0, values = (var_3178_cast_fp16_7, var_3246_cast_fp16))[name = tensor<string, []>("op_3274_cast_fp16")];
+            tensor<string, []> var_3276_equation_0 = const()[name = tensor<string, []>("op_3276_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16 = einsum(equation = var_3276_equation_0, values = (var_3178_cast_fp16_8, var_3247_cast_fp16))[name = tensor<string, []>("op_3276_cast_fp16")];
+            tensor<string, []> var_3278_equation_0 = const()[name = tensor<string, []>("op_3278_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3278_cast_fp16 = einsum(equation = var_3278_equation_0, values = (var_3178_cast_fp16_9, var_3248_cast_fp16))[name = tensor<string, []>("op_3278_cast_fp16")];
+            tensor<string, []> var_3280_equation_0 = const()[name = tensor<string, []>("op_3280_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3280_cast_fp16 = einsum(equation = var_3280_equation_0, values = (var_3178_cast_fp16_10, var_3249_cast_fp16))[name = tensor<string, []>("op_3280_cast_fp16")];
+            tensor<string, []> var_3282_equation_0 = const()[name = tensor<string, []>("op_3282_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3282_cast_fp16 = einsum(equation = var_3282_equation_0, values = (var_3178_cast_fp16_11, var_3250_cast_fp16))[name = tensor<string, []>("op_3282_cast_fp16")];
+            tensor<string, []> var_3284_equation_0 = const()[name = tensor<string, []>("op_3284_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3284_cast_fp16 = einsum(equation = var_3284_equation_0, values = (var_3178_cast_fp16_12, var_3251_cast_fp16))[name = tensor<string, []>("op_3284_cast_fp16")];
+            tensor<string, []> var_3286_equation_0 = const()[name = tensor<string, []>("op_3286_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3286_cast_fp16 = einsum(equation = var_3286_equation_0, values = (var_3178_cast_fp16_13, var_3252_cast_fp16))[name = tensor<string, []>("op_3286_cast_fp16")];
+            tensor<string, []> var_3288_equation_0 = const()[name = tensor<string, []>("op_3288_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3288_cast_fp16 = einsum(equation = var_3288_equation_0, values = (var_3178_cast_fp16_14, var_3253_cast_fp16))[name = tensor<string, []>("op_3288_cast_fp16")];
+            tensor<string, []> var_3290_equation_0 = const()[name = tensor<string, []>("op_3290_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3290_cast_fp16 = einsum(equation = var_3290_equation_0, values = (var_3178_cast_fp16_15, var_3254_cast_fp16))[name = tensor<string, []>("op_3290_cast_fp16")];
+            tensor<string, []> var_3292_equation_0 = const()[name = tensor<string, []>("op_3292_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3292_cast_fp16 = einsum(equation = var_3292_equation_0, values = (var_3178_cast_fp16_16, var_3255_cast_fp16))[name = tensor<string, []>("op_3292_cast_fp16")];
+            tensor<string, []> var_3294_equation_0 = const()[name = tensor<string, []>("op_3294_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3294_cast_fp16 = einsum(equation = var_3294_equation_0, values = (var_3178_cast_fp16_17, var_3256_cast_fp16))[name = tensor<string, []>("op_3294_cast_fp16")];
+            tensor<string, []> var_3296_equation_0 = const()[name = tensor<string, []>("op_3296_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3296_cast_fp16 = einsum(equation = var_3296_equation_0, values = (var_3178_cast_fp16_18, var_3257_cast_fp16))[name = tensor<string, []>("op_3296_cast_fp16")];
+            tensor<string, []> var_3298_equation_0 = const()[name = tensor<string, []>("op_3298_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3298_cast_fp16 = einsum(equation = var_3298_equation_0, values = (var_3178_cast_fp16_19, var_3258_cast_fp16))[name = tensor<string, []>("op_3298_cast_fp16")];
+            tensor<bool, []> input_115_interleave_0 = const()[name = tensor<string, []>("input_115_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_115_cast_fp16 = concat(axis = var_3083, interleave = input_115_interleave_0, values = (var_3260_cast_fp16, var_3262_cast_fp16, var_3264_cast_fp16, var_3266_cast_fp16, var_3268_cast_fp16, var_3270_cast_fp16, var_3272_cast_fp16, var_3274_cast_fp16, var_3276_cast_fp16, var_3278_cast_fp16, var_3280_cast_fp16, var_3282_cast_fp16, var_3284_cast_fp16, var_3286_cast_fp16, var_3288_cast_fp16, var_3290_cast_fp16, var_3292_cast_fp16, var_3294_cast_fp16, var_3296_cast_fp16, var_3298_cast_fp16))[name = tensor<string, []>("input_115_cast_fp16")];
+            tensor<string, []> var_3307_pad_type_0 = const()[name = tensor<string, []>("op_3307_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3307_strides_0 = const()[name = tensor<string, []>("op_3307_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3307_pad_0 = const()[name = tensor<string, []>("op_3307_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3307_dilations_0 = const()[name = tensor<string, []>("op_3307_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3307_groups_0 = const()[name = tensor<string, []>("op_3307_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_11_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(457386112)))];
+            tensor<fp16, [1280]> blocks_11_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460662976)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3307_cast_fp16 = conv(bias = blocks_11_attn_out_bias_to_fp16, dilations = var_3307_dilations_0, groups = var_3307_groups_0, pad = var_3307_pad_0, pad_type = var_3307_pad_type_0, strides = var_3307_strides_0, weight = blocks_11_attn_out_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("op_3307_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = var_3307_cast_fp16)[name = tensor<string, []>("inputs_47_cast_fp16")];
+            tensor<int32, [1]> input_117_axes_0 = const()[name = tensor<string, []>("input_117_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_117_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_117_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460665600)))];
+            tensor<fp16, [1280]> input_117_beta_0_to_fp16 = const()[name = tensor<string, []>("input_117_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460668224)))];
+            tensor<fp16, []> var_3317_to_fp16 = const()[name = tensor<string, []>("op_3317_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_117_cast_fp16 = layer_norm(axes = input_117_axes_0, beta = input_117_beta_0_to_fp16, epsilon = var_3317_to_fp16, gamma = input_117_gamma_0_to_fp16, x = inputs_47_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
+            tensor<string, []> input_119_pad_type_0 = const()[name = tensor<string, []>("input_119_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_119_strides_0 = const()[name = tensor<string, []>("input_119_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_119_pad_0 = const()[name = tensor<string, []>("input_119_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_119_dilations_0 = const()[name = tensor<string, []>("input_119_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_119_groups_0 = const()[name = tensor<string, []>("input_119_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_11_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(460670848)))];
+            tensor<fp16, [5120]> blocks_11_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(473778112)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_119_cast_fp16 = conv(bias = blocks_11_mlp_0_bias_to_fp16, dilations = input_119_dilations_0, groups = input_119_groups_0, pad = input_119_pad_0, pad_type = input_119_pad_type_0, strides = input_119_strides_0, weight = blocks_11_mlp_0_weight_to_fp16, x = input_117_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")];
+            tensor<string, []> input_121_mode_0 = const()[name = tensor<string, []>("input_121_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_121_cast_fp16 = gelu(mode = input_121_mode_0, x = input_119_cast_fp16)[name = tensor<string, []>("input_121_cast_fp16")];
+            tensor<string, []> var_3343_pad_type_0 = const()[name = tensor<string, []>("op_3343_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3343_strides_0 = const()[name = tensor<string, []>("op_3343_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3343_pad_0 = const()[name = tensor<string, []>("op_3343_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3343_dilations_0 = const()[name = tensor<string, []>("op_3343_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3343_groups_0 = const()[name = tensor<string, []>("op_3343_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_11_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(473788416)))];
+            tensor<fp16, [1280]> blocks_11_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486895680)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3343_cast_fp16 = conv(bias = blocks_11_mlp_2_bias_to_fp16, dilations = var_3343_dilations_0, groups = var_3343_groups_0, pad = var_3343_pad_0, pad_type = var_3343_pad_type_0, strides = var_3343_strides_0, weight = blocks_11_mlp_2_weight_to_fp16, x = input_121_cast_fp16)[name = tensor<string, []>("op_3343_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = var_3343_cast_fp16)[name = tensor<string, []>("inputs_49_cast_fp16")];
+            tensor<int32, []> var_3352 = const()[name = tensor<string, []>("op_3352"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_123_axes_0 = const()[name = tensor<string, []>("input_123_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_123_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_123_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486898304)))];
+            tensor<fp16, [1280]> input_123_beta_0_to_fp16 = const()[name = tensor<string, []>("input_123_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486900928)))];
+            tensor<fp16, []> var_3368_to_fp16 = const()[name = tensor<string, []>("op_3368_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_123_cast_fp16 = layer_norm(axes = input_123_axes_0, beta = input_123_beta_0_to_fp16, epsilon = var_3368_to_fp16, gamma = input_123_gamma_0_to_fp16, x = inputs_49_cast_fp16)[name = tensor<string, []>("input_123_cast_fp16")];
+            tensor<string, []> q_25_pad_type_0 = const()[name = tensor<string, []>("q_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_25_strides_0 = const()[name = tensor<string, []>("q_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_25_pad_0 = const()[name = tensor<string, []>("q_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_25_dilations_0 = const()[name = tensor<string, []>("q_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_25_groups_0 = const()[name = tensor<string, []>("q_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_3403_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3403_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(486903552)))];
+            tensor<fp16, [1280]> var_3403_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3403_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(490180416)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3403_cast_fp16 = conv(bias = var_3403_bias_0_to_fp16, dilations = q_25_dilations_0, groups = q_25_groups_0, pad = q_25_pad_0, pad_type = q_25_pad_type_0, strides = q_25_strides_0, weight = var_3403_weight_0_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("op_3403_cast_fp16")];
+            tensor<string, []> k_25_pad_type_0 = const()[name = tensor<string, []>("k_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_25_strides_0 = const()[name = tensor<string, []>("k_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_25_pad_0 = const()[name = tensor<string, []>("k_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_25_dilations_0 = const()[name = tensor<string, []>("k_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_25_groups_0 = const()[name = tensor<string, []>("k_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_12_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(490183040)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_25_cast_fp16 = conv(dilations = k_25_dilations_0, groups = k_25_groups_0, pad = k_25_pad_0, pad_type = k_25_pad_type_0, strides = k_25_strides_0, weight = blocks_12_attn_key_weight_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("k_25_cast_fp16")];
+            tensor<string, []> var_3401_pad_type_0 = const()[name = tensor<string, []>("op_3401_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3401_strides_0 = const()[name = tensor<string, []>("op_3401_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3401_pad_0 = const()[name = tensor<string, []>("op_3401_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3401_dilations_0 = const()[name = tensor<string, []>("op_3401_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3401_groups_0 = const()[name = tensor<string, []>("op_3401_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_12_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(493459904)))];
+            tensor<fp16, [1280]> blocks_12_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496736768)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3401_cast_fp16 = conv(bias = blocks_12_attn_value_bias_to_fp16, dilations = var_3401_dilations_0, groups = var_3401_groups_0, pad = var_3401_pad_0, pad_type = var_3401_pad_type_0, strides = var_3401_strides_0, weight = blocks_12_attn_value_weight_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("op_3401_cast_fp16")];
+            tensor<int32, [20]> tile_36 = const()[name = tensor<string, []>("tile_36"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3404_axis_0 = const()[name = tensor<string, []>("op_3404_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3404_cast_fp16_19 = split(axis = var_3404_axis_0, split_sizes = tile_36, x = var_3403_cast_fp16)[name = tensor<string, []>("op_3404_cast_fp16")];
+            tensor<int32, [4]> var_3425_perm_0 = const()[name = tensor<string, []>("op_3425_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_37 = const()[name = tensor<string, []>("tile_37"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3426_axis_0 = const()[name = tensor<string, []>("op_3426_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_3425_cast_fp16 = transpose(perm = var_3425_perm_0, x = k_25_cast_fp16)[name = tensor<string, []>("transpose_20")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_3426_cast_fp16_19 = split(axis = var_3426_axis_0, split_sizes = tile_37, x = var_3425_cast_fp16)[name = tensor<string, []>("op_3426_cast_fp16")];
+            tensor<int32, [20]> tile_38 = const()[name = tensor<string, []>("tile_38"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3447_axis_0 = const()[name = tensor<string, []>("op_3447_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3447_cast_fp16_19 = split(axis = var_3447_axis_0, split_sizes = tile_38, x = var_3401_cast_fp16)[name = tensor<string, []>("op_3447_cast_fp16")];
+            tensor<string, []> aw_481_equation_0 = const()[name = tensor<string, []>("aw_481_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_481_cast_fp16 = einsum(equation = aw_481_equation_0, values = (var_3426_cast_fp16_0, var_3404_cast_fp16_0))[name = tensor<string, []>("aw_481_cast_fp16")];
+            tensor<string, []> aw_483_equation_0 = const()[name = tensor<string, []>("aw_483_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_483_cast_fp16 = einsum(equation = aw_483_equation_0, values = (var_3426_cast_fp16_1, var_3404_cast_fp16_1))[name = tensor<string, []>("aw_483_cast_fp16")];
+            tensor<string, []> aw_485_equation_0 = const()[name = tensor<string, []>("aw_485_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_485_cast_fp16 = einsum(equation = aw_485_equation_0, values = (var_3426_cast_fp16_2, var_3404_cast_fp16_2))[name = tensor<string, []>("aw_485_cast_fp16")];
+            tensor<string, []> aw_487_equation_0 = const()[name = tensor<string, []>("aw_487_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_487_cast_fp16 = einsum(equation = aw_487_equation_0, values = (var_3426_cast_fp16_3, var_3404_cast_fp16_3))[name = tensor<string, []>("aw_487_cast_fp16")];
+            tensor<string, []> aw_489_equation_0 = const()[name = tensor<string, []>("aw_489_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_489_cast_fp16 = einsum(equation = aw_489_equation_0, values = (var_3426_cast_fp16_4, var_3404_cast_fp16_4))[name = tensor<string, []>("aw_489_cast_fp16")];
+            tensor<string, []> aw_491_equation_0 = const()[name = tensor<string, []>("aw_491_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_491_cast_fp16 = einsum(equation = aw_491_equation_0, values = (var_3426_cast_fp16_5, var_3404_cast_fp16_5))[name = tensor<string, []>("aw_491_cast_fp16")];
+            tensor<string, []> aw_493_equation_0 = const()[name = tensor<string, []>("aw_493_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_493_cast_fp16 = einsum(equation = aw_493_equation_0, values = (var_3426_cast_fp16_6, var_3404_cast_fp16_6))[name = tensor<string, []>("aw_493_cast_fp16")];
+            tensor<string, []> aw_495_equation_0 = const()[name = tensor<string, []>("aw_495_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_495_cast_fp16 = einsum(equation = aw_495_equation_0, values = (var_3426_cast_fp16_7, var_3404_cast_fp16_7))[name = tensor<string, []>("aw_495_cast_fp16")];
+            tensor<string, []> aw_497_equation_0 = const()[name = tensor<string, []>("aw_497_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_497_cast_fp16 = einsum(equation = aw_497_equation_0, values = (var_3426_cast_fp16_8, var_3404_cast_fp16_8))[name = tensor<string, []>("aw_497_cast_fp16")];
+            tensor<string, []> aw_499_equation_0 = const()[name = tensor<string, []>("aw_499_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_499_cast_fp16 = einsum(equation = aw_499_equation_0, values = (var_3426_cast_fp16_9, var_3404_cast_fp16_9))[name = tensor<string, []>("aw_499_cast_fp16")];
+            tensor<string, []> aw_501_equation_0 = const()[name = tensor<string, []>("aw_501_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_501_cast_fp16 = einsum(equation = aw_501_equation_0, values = (var_3426_cast_fp16_10, var_3404_cast_fp16_10))[name = tensor<string, []>("aw_501_cast_fp16")];
+            tensor<string, []> aw_503_equation_0 = const()[name = tensor<string, []>("aw_503_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_503_cast_fp16 = einsum(equation = aw_503_equation_0, values = (var_3426_cast_fp16_11, var_3404_cast_fp16_11))[name = tensor<string, []>("aw_503_cast_fp16")];
+            tensor<string, []> aw_505_equation_0 = const()[name = tensor<string, []>("aw_505_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_505_cast_fp16 = einsum(equation = aw_505_equation_0, values = (var_3426_cast_fp16_12, var_3404_cast_fp16_12))[name = tensor<string, []>("aw_505_cast_fp16")];
+            tensor<string, []> aw_507_equation_0 = const()[name = tensor<string, []>("aw_507_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_507_cast_fp16 = einsum(equation = aw_507_equation_0, values = (var_3426_cast_fp16_13, var_3404_cast_fp16_13))[name = tensor<string, []>("aw_507_cast_fp16")];
+            tensor<string, []> aw_509_equation_0 = const()[name = tensor<string, []>("aw_509_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_509_cast_fp16 = einsum(equation = aw_509_equation_0, values = (var_3426_cast_fp16_14, var_3404_cast_fp16_14))[name = tensor<string, []>("aw_509_cast_fp16")];
+            tensor<string, []> aw_511_equation_0 = const()[name = tensor<string, []>("aw_511_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_511_cast_fp16 = einsum(equation = aw_511_equation_0, values = (var_3426_cast_fp16_15, var_3404_cast_fp16_15))[name = tensor<string, []>("aw_511_cast_fp16")];
+            tensor<string, []> aw_513_equation_0 = const()[name = tensor<string, []>("aw_513_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_513_cast_fp16 = einsum(equation = aw_513_equation_0, values = (var_3426_cast_fp16_16, var_3404_cast_fp16_16))[name = tensor<string, []>("aw_513_cast_fp16")];
+            tensor<string, []> aw_515_equation_0 = const()[name = tensor<string, []>("aw_515_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_515_cast_fp16 = einsum(equation = aw_515_equation_0, values = (var_3426_cast_fp16_17, var_3404_cast_fp16_17))[name = tensor<string, []>("aw_515_cast_fp16")];
+            tensor<string, []> aw_517_equation_0 = const()[name = tensor<string, []>("aw_517_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_517_cast_fp16 = einsum(equation = aw_517_equation_0, values = (var_3426_cast_fp16_18, var_3404_cast_fp16_18))[name = tensor<string, []>("aw_517_cast_fp16")];
+            tensor<string, []> aw_519_equation_0 = const()[name = tensor<string, []>("aw_519_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_519_cast_fp16 = einsum(equation = aw_519_equation_0, values = (var_3426_cast_fp16_19, var_3404_cast_fp16_19))[name = tensor<string, []>("aw_519_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3508_cast_fp16 = softmax(axis = var_3352, x = aw_481_cast_fp16)[name = tensor<string, []>("op_3508_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3509_cast_fp16 = softmax(axis = var_3352, x = aw_483_cast_fp16)[name = tensor<string, []>("op_3509_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3510_cast_fp16 = softmax(axis = var_3352, x = aw_485_cast_fp16)[name = tensor<string, []>("op_3510_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3511_cast_fp16 = softmax(axis = var_3352, x = aw_487_cast_fp16)[name = tensor<string, []>("op_3511_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3512_cast_fp16 = softmax(axis = var_3352, x = aw_489_cast_fp16)[name = tensor<string, []>("op_3512_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3513_cast_fp16 = softmax(axis = var_3352, x = aw_491_cast_fp16)[name = tensor<string, []>("op_3513_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3514_cast_fp16 = softmax(axis = var_3352, x = aw_493_cast_fp16)[name = tensor<string, []>("op_3514_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3515_cast_fp16 = softmax(axis = var_3352, x = aw_495_cast_fp16)[name = tensor<string, []>("op_3515_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3516_cast_fp16 = softmax(axis = var_3352, x = aw_497_cast_fp16)[name = tensor<string, []>("op_3516_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3517_cast_fp16 = softmax(axis = var_3352, x = aw_499_cast_fp16)[name = tensor<string, []>("op_3517_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3518_cast_fp16 = softmax(axis = var_3352, x = aw_501_cast_fp16)[name = tensor<string, []>("op_3518_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3519_cast_fp16 = softmax(axis = var_3352, x = aw_503_cast_fp16)[name = tensor<string, []>("op_3519_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3520_cast_fp16 = softmax(axis = var_3352, x = aw_505_cast_fp16)[name = tensor<string, []>("op_3520_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3521_cast_fp16 = softmax(axis = var_3352, x = aw_507_cast_fp16)[name = tensor<string, []>("op_3521_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3522_cast_fp16 = softmax(axis = var_3352, x = aw_509_cast_fp16)[name = tensor<string, []>("op_3522_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3523_cast_fp16 = softmax(axis = var_3352, x = aw_511_cast_fp16)[name = tensor<string, []>("op_3523_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3524_cast_fp16 = softmax(axis = var_3352, x = aw_513_cast_fp16)[name = tensor<string, []>("op_3524_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3525_cast_fp16 = softmax(axis = var_3352, x = aw_515_cast_fp16)[name = tensor<string, []>("op_3525_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3526_cast_fp16 = softmax(axis = var_3352, x = aw_517_cast_fp16)[name = tensor<string, []>("op_3526_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3527_cast_fp16 = softmax(axis = var_3352, x = aw_519_cast_fp16)[name = tensor<string, []>("op_3527_cast_fp16")];
+            tensor<string, []> var_3529_equation_0 = const()[name = tensor<string, []>("op_3529_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3529_cast_fp16 = einsum(equation = var_3529_equation_0, values = (var_3447_cast_fp16_0, var_3508_cast_fp16))[name = tensor<string, []>("op_3529_cast_fp16")];
+            tensor<string, []> var_3531_equation_0 = const()[name = tensor<string, []>("op_3531_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3531_cast_fp16 = einsum(equation = var_3531_equation_0, values = (var_3447_cast_fp16_1, var_3509_cast_fp16))[name = tensor<string, []>("op_3531_cast_fp16")];
+            tensor<string, []> var_3533_equation_0 = const()[name = tensor<string, []>("op_3533_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3533_cast_fp16 = einsum(equation = var_3533_equation_0, values = (var_3447_cast_fp16_2, var_3510_cast_fp16))[name = tensor<string, []>("op_3533_cast_fp16")];
+            tensor<string, []> var_3535_equation_0 = const()[name = tensor<string, []>("op_3535_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3535_cast_fp16 = einsum(equation = var_3535_equation_0, values = (var_3447_cast_fp16_3, var_3511_cast_fp16))[name = tensor<string, []>("op_3535_cast_fp16")];
+            tensor<string, []> var_3537_equation_0 = const()[name = tensor<string, []>("op_3537_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3537_cast_fp16 = einsum(equation = var_3537_equation_0, values = (var_3447_cast_fp16_4, var_3512_cast_fp16))[name = tensor<string, []>("op_3537_cast_fp16")];
+            tensor<string, []> var_3539_equation_0 = const()[name = tensor<string, []>("op_3539_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3539_cast_fp16 = einsum(equation = var_3539_equation_0, values = (var_3447_cast_fp16_5, var_3513_cast_fp16))[name = tensor<string, []>("op_3539_cast_fp16")];
+            tensor<string, []> var_3541_equation_0 = const()[name = tensor<string, []>("op_3541_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3541_cast_fp16 = einsum(equation = var_3541_equation_0, values = (var_3447_cast_fp16_6, var_3514_cast_fp16))[name = tensor<string, []>("op_3541_cast_fp16")];
+            tensor<string, []> var_3543_equation_0 = const()[name = tensor<string, []>("op_3543_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3543_cast_fp16 = einsum(equation = var_3543_equation_0, values = (var_3447_cast_fp16_7, var_3515_cast_fp16))[name = tensor<string, []>("op_3543_cast_fp16")];
+            tensor<string, []> var_3545_equation_0 = const()[name = tensor<string, []>("op_3545_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3545_cast_fp16 = einsum(equation = var_3545_equation_0, values = (var_3447_cast_fp16_8, var_3516_cast_fp16))[name = tensor<string, []>("op_3545_cast_fp16")];
+            tensor<string, []> var_3547_equation_0 = const()[name = tensor<string, []>("op_3547_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3547_cast_fp16 = einsum(equation = var_3547_equation_0, values = (var_3447_cast_fp16_9, var_3517_cast_fp16))[name = tensor<string, []>("op_3547_cast_fp16")];
+            tensor<string, []> var_3549_equation_0 = const()[name = tensor<string, []>("op_3549_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3549_cast_fp16 = einsum(equation = var_3549_equation_0, values = (var_3447_cast_fp16_10, var_3518_cast_fp16))[name = tensor<string, []>("op_3549_cast_fp16")];
+            tensor<string, []> var_3551_equation_0 = const()[name = tensor<string, []>("op_3551_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3551_cast_fp16 = einsum(equation = var_3551_equation_0, values = (var_3447_cast_fp16_11, var_3519_cast_fp16))[name = tensor<string, []>("op_3551_cast_fp16")];
+            tensor<string, []> var_3553_equation_0 = const()[name = tensor<string, []>("op_3553_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3553_cast_fp16 = einsum(equation = var_3553_equation_0, values = (var_3447_cast_fp16_12, var_3520_cast_fp16))[name = tensor<string, []>("op_3553_cast_fp16")];
+            tensor<string, []> var_3555_equation_0 = const()[name = tensor<string, []>("op_3555_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3555_cast_fp16 = einsum(equation = var_3555_equation_0, values = (var_3447_cast_fp16_13, var_3521_cast_fp16))[name = tensor<string, []>("op_3555_cast_fp16")];
+            tensor<string, []> var_3557_equation_0 = const()[name = tensor<string, []>("op_3557_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3557_cast_fp16 = einsum(equation = var_3557_equation_0, values = (var_3447_cast_fp16_14, var_3522_cast_fp16))[name = tensor<string, []>("op_3557_cast_fp16")];
+            tensor<string, []> var_3559_equation_0 = const()[name = tensor<string, []>("op_3559_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3559_cast_fp16 = einsum(equation = var_3559_equation_0, values = (var_3447_cast_fp16_15, var_3523_cast_fp16))[name = tensor<string, []>("op_3559_cast_fp16")];
+            tensor<string, []> var_3561_equation_0 = const()[name = tensor<string, []>("op_3561_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3561_cast_fp16 = einsum(equation = var_3561_equation_0, values = (var_3447_cast_fp16_16, var_3524_cast_fp16))[name = tensor<string, []>("op_3561_cast_fp16")];
+            tensor<string, []> var_3563_equation_0 = const()[name = tensor<string, []>("op_3563_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3563_cast_fp16 = einsum(equation = var_3563_equation_0, values = (var_3447_cast_fp16_17, var_3525_cast_fp16))[name = tensor<string, []>("op_3563_cast_fp16")];
+            tensor<string, []> var_3565_equation_0 = const()[name = tensor<string, []>("op_3565_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3565_cast_fp16 = einsum(equation = var_3565_equation_0, values = (var_3447_cast_fp16_18, var_3526_cast_fp16))[name = tensor<string, []>("op_3565_cast_fp16")];
+            tensor<string, []> var_3567_equation_0 = const()[name = tensor<string, []>("op_3567_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3567_cast_fp16 = einsum(equation = var_3567_equation_0, values = (var_3447_cast_fp16_19, var_3527_cast_fp16))[name = tensor<string, []>("op_3567_cast_fp16")];
+            tensor<bool, []> input_125_interleave_0 = const()[name = tensor<string, []>("input_125_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_125_cast_fp16 = concat(axis = var_3352, interleave = input_125_interleave_0, values = (var_3529_cast_fp16, var_3531_cast_fp16, var_3533_cast_fp16, var_3535_cast_fp16, var_3537_cast_fp16, var_3539_cast_fp16, var_3541_cast_fp16, var_3543_cast_fp16, var_3545_cast_fp16, var_3547_cast_fp16, var_3549_cast_fp16, var_3551_cast_fp16, var_3553_cast_fp16, var_3555_cast_fp16, var_3557_cast_fp16, var_3559_cast_fp16, var_3561_cast_fp16, var_3563_cast_fp16, var_3565_cast_fp16, var_3567_cast_fp16))[name = tensor<string, []>("input_125_cast_fp16")];
+            tensor<string, []> var_3576_pad_type_0 = const()[name = tensor<string, []>("op_3576_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3576_strides_0 = const()[name = tensor<string, []>("op_3576_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3576_pad_0 = const()[name = tensor<string, []>("op_3576_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3576_dilations_0 = const()[name = tensor<string, []>("op_3576_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3576_groups_0 = const()[name = tensor<string, []>("op_3576_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_12_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496739392)))];
+            tensor<fp16, [1280]> blocks_12_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(500016256)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3576_cast_fp16 = conv(bias = blocks_12_attn_out_bias_to_fp16, dilations = var_3576_dilations_0, groups = var_3576_groups_0, pad = var_3576_pad_0, pad_type = var_3576_pad_type_0, strides = var_3576_strides_0, weight = blocks_12_attn_out_weight_to_fp16, x = input_125_cast_fp16)[name = tensor<string, []>("op_3576_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = var_3576_cast_fp16)[name = tensor<string, []>("inputs_51_cast_fp16")];
+            tensor<int32, [1]> input_127_axes_0 = const()[name = tensor<string, []>("input_127_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_127_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_127_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(500018880)))];
+            tensor<fp16, [1280]> input_127_beta_0_to_fp16 = const()[name = tensor<string, []>("input_127_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(500021504)))];
+            tensor<fp16, []> var_3586_to_fp16 = const()[name = tensor<string, []>("op_3586_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_127_cast_fp16 = layer_norm(axes = input_127_axes_0, beta = input_127_beta_0_to_fp16, epsilon = var_3586_to_fp16, gamma = input_127_gamma_0_to_fp16, x = inputs_51_cast_fp16)[name = tensor<string, []>("input_127_cast_fp16")];
+            tensor<string, []> input_129_pad_type_0 = const()[name = tensor<string, []>("input_129_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_129_strides_0 = const()[name = tensor<string, []>("input_129_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_129_pad_0 = const()[name = tensor<string, []>("input_129_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_129_dilations_0 = const()[name = tensor<string, []>("input_129_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_129_groups_0 = const()[name = tensor<string, []>("input_129_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_12_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(500024128)))];
+            tensor<fp16, [5120]> blocks_12_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(513131392)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_129_cast_fp16 = conv(bias = blocks_12_mlp_0_bias_to_fp16, dilations = input_129_dilations_0, groups = input_129_groups_0, pad = input_129_pad_0, pad_type = input_129_pad_type_0, strides = input_129_strides_0, weight = blocks_12_mlp_0_weight_to_fp16, x = input_127_cast_fp16)[name = tensor<string, []>("input_129_cast_fp16")];
+            tensor<string, []> input_131_mode_0 = const()[name = tensor<string, []>("input_131_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_131_cast_fp16 = gelu(mode = input_131_mode_0, x = input_129_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")];
+            tensor<string, []> var_3612_pad_type_0 = const()[name = tensor<string, []>("op_3612_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3612_strides_0 = const()[name = tensor<string, []>("op_3612_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3612_pad_0 = const()[name = tensor<string, []>("op_3612_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3612_dilations_0 = const()[name = tensor<string, []>("op_3612_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3612_groups_0 = const()[name = tensor<string, []>("op_3612_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_12_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(513141696)))];
+            tensor<fp16, [1280]> blocks_12_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(526248960)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3612_cast_fp16 = conv(bias = blocks_12_mlp_2_bias_to_fp16, dilations = var_3612_dilations_0, groups = var_3612_groups_0, pad = var_3612_pad_0, pad_type = var_3612_pad_type_0, strides = var_3612_strides_0, weight = blocks_12_mlp_2_weight_to_fp16, x = input_131_cast_fp16)[name = tensor<string, []>("op_3612_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = var_3612_cast_fp16)[name = tensor<string, []>("inputs_53_cast_fp16")];
+            tensor<int32, []> var_3621 = const()[name = tensor<string, []>("op_3621"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_133_axes_0 = const()[name = tensor<string, []>("input_133_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_133_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_133_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(526251584)))];
+            tensor<fp16, [1280]> input_133_beta_0_to_fp16 = const()[name = tensor<string, []>("input_133_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(526254208)))];
+            tensor<fp16, []> var_3637_to_fp16 = const()[name = tensor<string, []>("op_3637_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_133_cast_fp16 = layer_norm(axes = input_133_axes_0, beta = input_133_beta_0_to_fp16, epsilon = var_3637_to_fp16, gamma = input_133_gamma_0_to_fp16, x = inputs_53_cast_fp16)[name = tensor<string, []>("input_133_cast_fp16")];
+            tensor<string, []> q_27_pad_type_0 = const()[name = tensor<string, []>("q_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_27_strides_0 = const()[name = tensor<string, []>("q_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_27_pad_0 = const()[name = tensor<string, []>("q_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_27_dilations_0 = const()[name = tensor<string, []>("q_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_27_groups_0 = const()[name = tensor<string, []>("q_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_3672_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3672_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(526256832)))];
+            tensor<fp16, [1280]> var_3672_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3672_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(529533696)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3672_cast_fp16 = conv(bias = var_3672_bias_0_to_fp16, dilations = q_27_dilations_0, groups = q_27_groups_0, pad = q_27_pad_0, pad_type = q_27_pad_type_0, strides = q_27_strides_0, weight = var_3672_weight_0_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("op_3672_cast_fp16")];
+            tensor<string, []> k_27_pad_type_0 = const()[name = tensor<string, []>("k_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_27_strides_0 = const()[name = tensor<string, []>("k_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_27_pad_0 = const()[name = tensor<string, []>("k_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_27_dilations_0 = const()[name = tensor<string, []>("k_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_27_groups_0 = const()[name = tensor<string, []>("k_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_13_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(529536320)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_27_cast_fp16 = conv(dilations = k_27_dilations_0, groups = k_27_groups_0, pad = k_27_pad_0, pad_type = k_27_pad_type_0, strides = k_27_strides_0, weight = blocks_13_attn_key_weight_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("k_27_cast_fp16")];
+            tensor<string, []> var_3670_pad_type_0 = const()[name = tensor<string, []>("op_3670_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3670_strides_0 = const()[name = tensor<string, []>("op_3670_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3670_pad_0 = const()[name = tensor<string, []>("op_3670_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3670_dilations_0 = const()[name = tensor<string, []>("op_3670_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3670_groups_0 = const()[name = tensor<string, []>("op_3670_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_13_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(532813184)))];
+            tensor<fp16, [1280]> blocks_13_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(536090048)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3670_cast_fp16 = conv(bias = blocks_13_attn_value_bias_to_fp16, dilations = var_3670_dilations_0, groups = var_3670_groups_0, pad = var_3670_pad_0, pad_type = var_3670_pad_type_0, strides = var_3670_strides_0, weight = blocks_13_attn_value_weight_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("op_3670_cast_fp16")];
+            tensor<int32, [20]> tile_39 = const()[name = tensor<string, []>("tile_39"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3673_axis_0 = const()[name = tensor<string, []>("op_3673_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3673_cast_fp16_19 = split(axis = var_3673_axis_0, split_sizes = tile_39, x = var_3672_cast_fp16)[name = tensor<string, []>("op_3673_cast_fp16")];
+            tensor<int32, [4]> var_3694_perm_0 = const()[name = tensor<string, []>("op_3694_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_40 = const()[name = tensor<string, []>("tile_40"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3695_axis_0 = const()[name = tensor<string, []>("op_3695_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_3694_cast_fp16 = transpose(perm = var_3694_perm_0, x = k_27_cast_fp16)[name = tensor<string, []>("transpose_19")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_3695_cast_fp16_19 = split(axis = var_3695_axis_0, split_sizes = tile_40, x = var_3694_cast_fp16)[name = tensor<string, []>("op_3695_cast_fp16")];
+            tensor<int32, [20]> tile_41 = const()[name = tensor<string, []>("tile_41"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3716_axis_0 = const()[name = tensor<string, []>("op_3716_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3716_cast_fp16_19 = split(axis = var_3716_axis_0, split_sizes = tile_41, x = var_3670_cast_fp16)[name = tensor<string, []>("op_3716_cast_fp16")];
+            tensor<string, []> aw_521_equation_0 = const()[name = tensor<string, []>("aw_521_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_521_cast_fp16 = einsum(equation = aw_521_equation_0, values = (var_3695_cast_fp16_0, var_3673_cast_fp16_0))[name = tensor<string, []>("aw_521_cast_fp16")];
+            tensor<string, []> aw_523_equation_0 = const()[name = tensor<string, []>("aw_523_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_523_cast_fp16 = einsum(equation = aw_523_equation_0, values = (var_3695_cast_fp16_1, var_3673_cast_fp16_1))[name = tensor<string, []>("aw_523_cast_fp16")];
+            tensor<string, []> aw_525_equation_0 = const()[name = tensor<string, []>("aw_525_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_525_cast_fp16 = einsum(equation = aw_525_equation_0, values = (var_3695_cast_fp16_2, var_3673_cast_fp16_2))[name = tensor<string, []>("aw_525_cast_fp16")];
+            tensor<string, []> aw_527_equation_0 = const()[name = tensor<string, []>("aw_527_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_527_cast_fp16 = einsum(equation = aw_527_equation_0, values = (var_3695_cast_fp16_3, var_3673_cast_fp16_3))[name = tensor<string, []>("aw_527_cast_fp16")];
+            tensor<string, []> aw_529_equation_0 = const()[name = tensor<string, []>("aw_529_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_529_cast_fp16 = einsum(equation = aw_529_equation_0, values = (var_3695_cast_fp16_4, var_3673_cast_fp16_4))[name = tensor<string, []>("aw_529_cast_fp16")];
+            tensor<string, []> aw_531_equation_0 = const()[name = tensor<string, []>("aw_531_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_531_cast_fp16 = einsum(equation = aw_531_equation_0, values = (var_3695_cast_fp16_5, var_3673_cast_fp16_5))[name = tensor<string, []>("aw_531_cast_fp16")];
+            tensor<string, []> aw_533_equation_0 = const()[name = tensor<string, []>("aw_533_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_533_cast_fp16 = einsum(equation = aw_533_equation_0, values = (var_3695_cast_fp16_6, var_3673_cast_fp16_6))[name = tensor<string, []>("aw_533_cast_fp16")];
+            tensor<string, []> aw_535_equation_0 = const()[name = tensor<string, []>("aw_535_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_535_cast_fp16 = einsum(equation = aw_535_equation_0, values = (var_3695_cast_fp16_7, var_3673_cast_fp16_7))[name = tensor<string, []>("aw_535_cast_fp16")];
+            tensor<string, []> aw_537_equation_0 = const()[name = tensor<string, []>("aw_537_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_537_cast_fp16 = einsum(equation = aw_537_equation_0, values = (var_3695_cast_fp16_8, var_3673_cast_fp16_8))[name = tensor<string, []>("aw_537_cast_fp16")];
+            tensor<string, []> aw_539_equation_0 = const()[name = tensor<string, []>("aw_539_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_539_cast_fp16 = einsum(equation = aw_539_equation_0, values = (var_3695_cast_fp16_9, var_3673_cast_fp16_9))[name = tensor<string, []>("aw_539_cast_fp16")];
+            tensor<string, []> aw_541_equation_0 = const()[name = tensor<string, []>("aw_541_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_541_cast_fp16 = einsum(equation = aw_541_equation_0, values = (var_3695_cast_fp16_10, var_3673_cast_fp16_10))[name = tensor<string, []>("aw_541_cast_fp16")];
+            tensor<string, []> aw_543_equation_0 = const()[name = tensor<string, []>("aw_543_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_543_cast_fp16 = einsum(equation = aw_543_equation_0, values = (var_3695_cast_fp16_11, var_3673_cast_fp16_11))[name = tensor<string, []>("aw_543_cast_fp16")];
+            tensor<string, []> aw_545_equation_0 = const()[name = tensor<string, []>("aw_545_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_545_cast_fp16 = einsum(equation = aw_545_equation_0, values = (var_3695_cast_fp16_12, var_3673_cast_fp16_12))[name = tensor<string, []>("aw_545_cast_fp16")];
+            tensor<string, []> aw_547_equation_0 = const()[name = tensor<string, []>("aw_547_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_547_cast_fp16 = einsum(equation = aw_547_equation_0, values = (var_3695_cast_fp16_13, var_3673_cast_fp16_13))[name = tensor<string, []>("aw_547_cast_fp16")];
+            tensor<string, []> aw_549_equation_0 = const()[name = tensor<string, []>("aw_549_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_549_cast_fp16 = einsum(equation = aw_549_equation_0, values = (var_3695_cast_fp16_14, var_3673_cast_fp16_14))[name = tensor<string, []>("aw_549_cast_fp16")];
+            tensor<string, []> aw_551_equation_0 = const()[name = tensor<string, []>("aw_551_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_551_cast_fp16 = einsum(equation = aw_551_equation_0, values = (var_3695_cast_fp16_15, var_3673_cast_fp16_15))[name = tensor<string, []>("aw_551_cast_fp16")];
+            tensor<string, []> aw_553_equation_0 = const()[name = tensor<string, []>("aw_553_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_553_cast_fp16 = einsum(equation = aw_553_equation_0, values = (var_3695_cast_fp16_16, var_3673_cast_fp16_16))[name = tensor<string, []>("aw_553_cast_fp16")];
+            tensor<string, []> aw_555_equation_0 = const()[name = tensor<string, []>("aw_555_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_555_cast_fp16 = einsum(equation = aw_555_equation_0, values = (var_3695_cast_fp16_17, var_3673_cast_fp16_17))[name = tensor<string, []>("aw_555_cast_fp16")];
+            tensor<string, []> aw_557_equation_0 = const()[name = tensor<string, []>("aw_557_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_557_cast_fp16 = einsum(equation = aw_557_equation_0, values = (var_3695_cast_fp16_18, var_3673_cast_fp16_18))[name = tensor<string, []>("aw_557_cast_fp16")];
+            tensor<string, []> aw_559_equation_0 = const()[name = tensor<string, []>("aw_559_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_559_cast_fp16 = einsum(equation = aw_559_equation_0, values = (var_3695_cast_fp16_19, var_3673_cast_fp16_19))[name = tensor<string, []>("aw_559_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3777_cast_fp16 = softmax(axis = var_3621, x = aw_521_cast_fp16)[name = tensor<string, []>("op_3777_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3778_cast_fp16 = softmax(axis = var_3621, x = aw_523_cast_fp16)[name = tensor<string, []>("op_3778_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3779_cast_fp16 = softmax(axis = var_3621, x = aw_525_cast_fp16)[name = tensor<string, []>("op_3779_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3780_cast_fp16 = softmax(axis = var_3621, x = aw_527_cast_fp16)[name = tensor<string, []>("op_3780_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3781_cast_fp16 = softmax(axis = var_3621, x = aw_529_cast_fp16)[name = tensor<string, []>("op_3781_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3782_cast_fp16 = softmax(axis = var_3621, x = aw_531_cast_fp16)[name = tensor<string, []>("op_3782_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3783_cast_fp16 = softmax(axis = var_3621, x = aw_533_cast_fp16)[name = tensor<string, []>("op_3783_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3784_cast_fp16 = softmax(axis = var_3621, x = aw_535_cast_fp16)[name = tensor<string, []>("op_3784_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3785_cast_fp16 = softmax(axis = var_3621, x = aw_537_cast_fp16)[name = tensor<string, []>("op_3785_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3786_cast_fp16 = softmax(axis = var_3621, x = aw_539_cast_fp16)[name = tensor<string, []>("op_3786_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3787_cast_fp16 = softmax(axis = var_3621, x = aw_541_cast_fp16)[name = tensor<string, []>("op_3787_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3788_cast_fp16 = softmax(axis = var_3621, x = aw_543_cast_fp16)[name = tensor<string, []>("op_3788_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3789_cast_fp16 = softmax(axis = var_3621, x = aw_545_cast_fp16)[name = tensor<string, []>("op_3789_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3790_cast_fp16 = softmax(axis = var_3621, x = aw_547_cast_fp16)[name = tensor<string, []>("op_3790_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3791_cast_fp16 = softmax(axis = var_3621, x = aw_549_cast_fp16)[name = tensor<string, []>("op_3791_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3792_cast_fp16 = softmax(axis = var_3621, x = aw_551_cast_fp16)[name = tensor<string, []>("op_3792_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3793_cast_fp16 = softmax(axis = var_3621, x = aw_553_cast_fp16)[name = tensor<string, []>("op_3793_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3794_cast_fp16 = softmax(axis = var_3621, x = aw_555_cast_fp16)[name = tensor<string, []>("op_3794_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3795_cast_fp16 = softmax(axis = var_3621, x = aw_557_cast_fp16)[name = tensor<string, []>("op_3795_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3796_cast_fp16 = softmax(axis = var_3621, x = aw_559_cast_fp16)[name = tensor<string, []>("op_3796_cast_fp16")];
+            tensor<string, []> var_3798_equation_0 = const()[name = tensor<string, []>("op_3798_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3798_cast_fp16 = einsum(equation = var_3798_equation_0, values = (var_3716_cast_fp16_0, var_3777_cast_fp16))[name = tensor<string, []>("op_3798_cast_fp16")];
+            tensor<string, []> var_3800_equation_0 = const()[name = tensor<string, []>("op_3800_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3800_cast_fp16 = einsum(equation = var_3800_equation_0, values = (var_3716_cast_fp16_1, var_3778_cast_fp16))[name = tensor<string, []>("op_3800_cast_fp16")];
+            tensor<string, []> var_3802_equation_0 = const()[name = tensor<string, []>("op_3802_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3802_cast_fp16 = einsum(equation = var_3802_equation_0, values = (var_3716_cast_fp16_2, var_3779_cast_fp16))[name = tensor<string, []>("op_3802_cast_fp16")];
+            tensor<string, []> var_3804_equation_0 = const()[name = tensor<string, []>("op_3804_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3804_cast_fp16 = einsum(equation = var_3804_equation_0, values = (var_3716_cast_fp16_3, var_3780_cast_fp16))[name = tensor<string, []>("op_3804_cast_fp16")];
+            tensor<string, []> var_3806_equation_0 = const()[name = tensor<string, []>("op_3806_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3806_cast_fp16 = einsum(equation = var_3806_equation_0, values = (var_3716_cast_fp16_4, var_3781_cast_fp16))[name = tensor<string, []>("op_3806_cast_fp16")];
+            tensor<string, []> var_3808_equation_0 = const()[name = tensor<string, []>("op_3808_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3808_cast_fp16 = einsum(equation = var_3808_equation_0, values = (var_3716_cast_fp16_5, var_3782_cast_fp16))[name = tensor<string, []>("op_3808_cast_fp16")];
+            tensor<string, []> var_3810_equation_0 = const()[name = tensor<string, []>("op_3810_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3810_cast_fp16 = einsum(equation = var_3810_equation_0, values = (var_3716_cast_fp16_6, var_3783_cast_fp16))[name = tensor<string, []>("op_3810_cast_fp16")];
+            tensor<string, []> var_3812_equation_0 = const()[name = tensor<string, []>("op_3812_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3812_cast_fp16 = einsum(equation = var_3812_equation_0, values = (var_3716_cast_fp16_7, var_3784_cast_fp16))[name = tensor<string, []>("op_3812_cast_fp16")];
+            tensor<string, []> var_3814_equation_0 = const()[name = tensor<string, []>("op_3814_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3814_cast_fp16 = einsum(equation = var_3814_equation_0, values = (var_3716_cast_fp16_8, var_3785_cast_fp16))[name = tensor<string, []>("op_3814_cast_fp16")];
+            tensor<string, []> var_3816_equation_0 = const()[name = tensor<string, []>("op_3816_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3816_cast_fp16 = einsum(equation = var_3816_equation_0, values = (var_3716_cast_fp16_9, var_3786_cast_fp16))[name = tensor<string, []>("op_3816_cast_fp16")];
+            tensor<string, []> var_3818_equation_0 = const()[name = tensor<string, []>("op_3818_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3818_cast_fp16 = einsum(equation = var_3818_equation_0, values = (var_3716_cast_fp16_10, var_3787_cast_fp16))[name = tensor<string, []>("op_3818_cast_fp16")];
+            tensor<string, []> var_3820_equation_0 = const()[name = tensor<string, []>("op_3820_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3820_cast_fp16 = einsum(equation = var_3820_equation_0, values = (var_3716_cast_fp16_11, var_3788_cast_fp16))[name = tensor<string, []>("op_3820_cast_fp16")];
+            tensor<string, []> var_3822_equation_0 = const()[name = tensor<string, []>("op_3822_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3822_cast_fp16 = einsum(equation = var_3822_equation_0, values = (var_3716_cast_fp16_12, var_3789_cast_fp16))[name = tensor<string, []>("op_3822_cast_fp16")];
+            tensor<string, []> var_3824_equation_0 = const()[name = tensor<string, []>("op_3824_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3824_cast_fp16 = einsum(equation = var_3824_equation_0, values = (var_3716_cast_fp16_13, var_3790_cast_fp16))[name = tensor<string, []>("op_3824_cast_fp16")];
+            tensor<string, []> var_3826_equation_0 = const()[name = tensor<string, []>("op_3826_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3826_cast_fp16 = einsum(equation = var_3826_equation_0, values = (var_3716_cast_fp16_14, var_3791_cast_fp16))[name = tensor<string, []>("op_3826_cast_fp16")];
+            tensor<string, []> var_3828_equation_0 = const()[name = tensor<string, []>("op_3828_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3828_cast_fp16 = einsum(equation = var_3828_equation_0, values = (var_3716_cast_fp16_15, var_3792_cast_fp16))[name = tensor<string, []>("op_3828_cast_fp16")];
+            tensor<string, []> var_3830_equation_0 = const()[name = tensor<string, []>("op_3830_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3830_cast_fp16 = einsum(equation = var_3830_equation_0, values = (var_3716_cast_fp16_16, var_3793_cast_fp16))[name = tensor<string, []>("op_3830_cast_fp16")];
+            tensor<string, []> var_3832_equation_0 = const()[name = tensor<string, []>("op_3832_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3832_cast_fp16 = einsum(equation = var_3832_equation_0, values = (var_3716_cast_fp16_17, var_3794_cast_fp16))[name = tensor<string, []>("op_3832_cast_fp16")];
+            tensor<string, []> var_3834_equation_0 = const()[name = tensor<string, []>("op_3834_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3834_cast_fp16 = einsum(equation = var_3834_equation_0, values = (var_3716_cast_fp16_18, var_3795_cast_fp16))[name = tensor<string, []>("op_3834_cast_fp16")];
+            tensor<string, []> var_3836_equation_0 = const()[name = tensor<string, []>("op_3836_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3836_cast_fp16 = einsum(equation = var_3836_equation_0, values = (var_3716_cast_fp16_19, var_3796_cast_fp16))[name = tensor<string, []>("op_3836_cast_fp16")];
+            tensor<bool, []> input_135_interleave_0 = const()[name = tensor<string, []>("input_135_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_135_cast_fp16 = concat(axis = var_3621, interleave = input_135_interleave_0, values = (var_3798_cast_fp16, var_3800_cast_fp16, var_3802_cast_fp16, var_3804_cast_fp16, var_3806_cast_fp16, var_3808_cast_fp16, var_3810_cast_fp16, var_3812_cast_fp16, var_3814_cast_fp16, var_3816_cast_fp16, var_3818_cast_fp16, var_3820_cast_fp16, var_3822_cast_fp16, var_3824_cast_fp16, var_3826_cast_fp16, var_3828_cast_fp16, var_3830_cast_fp16, var_3832_cast_fp16, var_3834_cast_fp16, var_3836_cast_fp16))[name = tensor<string, []>("input_135_cast_fp16")];
+            tensor<string, []> var_3845_pad_type_0 = const()[name = tensor<string, []>("op_3845_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3845_strides_0 = const()[name = tensor<string, []>("op_3845_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3845_pad_0 = const()[name = tensor<string, []>("op_3845_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3845_dilations_0 = const()[name = tensor<string, []>("op_3845_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3845_groups_0 = const()[name = tensor<string, []>("op_3845_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_13_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(536092672)))];
+            tensor<fp16, [1280]> blocks_13_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(539369536)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3845_cast_fp16 = conv(bias = blocks_13_attn_out_bias_to_fp16, dilations = var_3845_dilations_0, groups = var_3845_groups_0, pad = var_3845_pad_0, pad_type = var_3845_pad_type_0, strides = var_3845_strides_0, weight = blocks_13_attn_out_weight_to_fp16, x = input_135_cast_fp16)[name = tensor<string, []>("op_3845_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = var_3845_cast_fp16)[name = tensor<string, []>("inputs_55_cast_fp16")];
+            tensor<int32, [1]> input_137_axes_0 = const()[name = tensor<string, []>("input_137_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_137_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_137_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(539372160)))];
+            tensor<fp16, [1280]> input_137_beta_0_to_fp16 = const()[name = tensor<string, []>("input_137_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(539374784)))];
+            tensor<fp16, []> var_3855_to_fp16 = const()[name = tensor<string, []>("op_3855_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_137_cast_fp16 = layer_norm(axes = input_137_axes_0, beta = input_137_beta_0_to_fp16, epsilon = var_3855_to_fp16, gamma = input_137_gamma_0_to_fp16, x = inputs_55_cast_fp16)[name = tensor<string, []>("input_137_cast_fp16")];
+            tensor<string, []> input_139_pad_type_0 = const()[name = tensor<string, []>("input_139_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_139_strides_0 = const()[name = tensor<string, []>("input_139_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_139_pad_0 = const()[name = tensor<string, []>("input_139_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_139_dilations_0 = const()[name = tensor<string, []>("input_139_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_139_groups_0 = const()[name = tensor<string, []>("input_139_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_13_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(539377408)))];
+            tensor<fp16, [5120]> blocks_13_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(552484672)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_139_cast_fp16 = conv(bias = blocks_13_mlp_0_bias_to_fp16, dilations = input_139_dilations_0, groups = input_139_groups_0, pad = input_139_pad_0, pad_type = input_139_pad_type_0, strides = input_139_strides_0, weight = blocks_13_mlp_0_weight_to_fp16, x = input_137_cast_fp16)[name = tensor<string, []>("input_139_cast_fp16")];
+            tensor<string, []> input_141_mode_0 = const()[name = tensor<string, []>("input_141_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_141_cast_fp16 = gelu(mode = input_141_mode_0, x = input_139_cast_fp16)[name = tensor<string, []>("input_141_cast_fp16")];
+            tensor<string, []> var_3881_pad_type_0 = const()[name = tensor<string, []>("op_3881_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3881_strides_0 = const()[name = tensor<string, []>("op_3881_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3881_pad_0 = const()[name = tensor<string, []>("op_3881_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3881_dilations_0 = const()[name = tensor<string, []>("op_3881_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3881_groups_0 = const()[name = tensor<string, []>("op_3881_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_13_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(552494976)))];
+            tensor<fp16, [1280]> blocks_13_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(565602240)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3881_cast_fp16 = conv(bias = blocks_13_mlp_2_bias_to_fp16, dilations = var_3881_dilations_0, groups = var_3881_groups_0, pad = var_3881_pad_0, pad_type = var_3881_pad_type_0, strides = var_3881_strides_0, weight = blocks_13_mlp_2_weight_to_fp16, x = input_141_cast_fp16)[name = tensor<string, []>("op_3881_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = var_3881_cast_fp16)[name = tensor<string, []>("inputs_57_cast_fp16")];
+            tensor<int32, []> var_3890 = const()[name = tensor<string, []>("op_3890"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_143_axes_0 = const()[name = tensor<string, []>("input_143_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_143_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_143_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(565604864)))];
+            tensor<fp16, [1280]> input_143_beta_0_to_fp16 = const()[name = tensor<string, []>("input_143_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(565607488)))];
+            tensor<fp16, []> var_3906_to_fp16 = const()[name = tensor<string, []>("op_3906_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_143_cast_fp16 = layer_norm(axes = input_143_axes_0, beta = input_143_beta_0_to_fp16, epsilon = var_3906_to_fp16, gamma = input_143_gamma_0_to_fp16, x = inputs_57_cast_fp16)[name = tensor<string, []>("input_143_cast_fp16")];
+            tensor<string, []> q_29_pad_type_0 = const()[name = tensor<string, []>("q_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_29_strides_0 = const()[name = tensor<string, []>("q_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_29_pad_0 = const()[name = tensor<string, []>("q_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_29_dilations_0 = const()[name = tensor<string, []>("q_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_29_groups_0 = const()[name = tensor<string, []>("q_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_3941_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3941_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(565610112)))];
+            tensor<fp16, [1280]> var_3941_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3941_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(568886976)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3941_cast_fp16 = conv(bias = var_3941_bias_0_to_fp16, dilations = q_29_dilations_0, groups = q_29_groups_0, pad = q_29_pad_0, pad_type = q_29_pad_type_0, strides = q_29_strides_0, weight = var_3941_weight_0_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("op_3941_cast_fp16")];
+            tensor<string, []> k_29_pad_type_0 = const()[name = tensor<string, []>("k_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_29_strides_0 = const()[name = tensor<string, []>("k_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_29_pad_0 = const()[name = tensor<string, []>("k_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_29_dilations_0 = const()[name = tensor<string, []>("k_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_29_groups_0 = const()[name = tensor<string, []>("k_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_14_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(568889600)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_29_cast_fp16 = conv(dilations = k_29_dilations_0, groups = k_29_groups_0, pad = k_29_pad_0, pad_type = k_29_pad_type_0, strides = k_29_strides_0, weight = blocks_14_attn_key_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("k_29_cast_fp16")];
+            tensor<string, []> var_3939_pad_type_0 = const()[name = tensor<string, []>("op_3939_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3939_strides_0 = const()[name = tensor<string, []>("op_3939_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3939_pad_0 = const()[name = tensor<string, []>("op_3939_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3939_dilations_0 = const()[name = tensor<string, []>("op_3939_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3939_groups_0 = const()[name = tensor<string, []>("op_3939_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_14_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(572166464)))];
+            tensor<fp16, [1280]> blocks_14_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(575443328)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3939_cast_fp16 = conv(bias = blocks_14_attn_value_bias_to_fp16, dilations = var_3939_dilations_0, groups = var_3939_groups_0, pad = var_3939_pad_0, pad_type = var_3939_pad_type_0, strides = var_3939_strides_0, weight = blocks_14_attn_value_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("op_3939_cast_fp16")];
+            tensor<int32, [20]> tile_42 = const()[name = tensor<string, []>("tile_42"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3942_axis_0 = const()[name = tensor<string, []>("op_3942_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3942_cast_fp16_19 = split(axis = var_3942_axis_0, split_sizes = tile_42, x = var_3941_cast_fp16)[name = tensor<string, []>("op_3942_cast_fp16")];
+            tensor<int32, [4]> var_3963_perm_0 = const()[name = tensor<string, []>("op_3963_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_43 = const()[name = tensor<string, []>("tile_43"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3964_axis_0 = const()[name = tensor<string, []>("op_3964_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_3963_cast_fp16 = transpose(perm = var_3963_perm_0, x = k_29_cast_fp16)[name = tensor<string, []>("transpose_18")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_3964_cast_fp16_19 = split(axis = var_3964_axis_0, split_sizes = tile_43, x = var_3963_cast_fp16)[name = tensor<string, []>("op_3964_cast_fp16")];
+            tensor<int32, [20]> tile_44 = const()[name = tensor<string, []>("tile_44"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3985_axis_0 = const()[name = tensor<string, []>("op_3985_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_3985_cast_fp16_19 = split(axis = var_3985_axis_0, split_sizes = tile_44, x = var_3939_cast_fp16)[name = tensor<string, []>("op_3985_cast_fp16")];
+            tensor<string, []> aw_561_equation_0 = const()[name = tensor<string, []>("aw_561_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_561_cast_fp16 = einsum(equation = aw_561_equation_0, values = (var_3964_cast_fp16_0, var_3942_cast_fp16_0))[name = tensor<string, []>("aw_561_cast_fp16")];
+            tensor<string, []> aw_563_equation_0 = const()[name = tensor<string, []>("aw_563_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_563_cast_fp16 = einsum(equation = aw_563_equation_0, values = (var_3964_cast_fp16_1, var_3942_cast_fp16_1))[name = tensor<string, []>("aw_563_cast_fp16")];
+            tensor<string, []> aw_565_equation_0 = const()[name = tensor<string, []>("aw_565_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_565_cast_fp16 = einsum(equation = aw_565_equation_0, values = (var_3964_cast_fp16_2, var_3942_cast_fp16_2))[name = tensor<string, []>("aw_565_cast_fp16")];
+            tensor<string, []> aw_567_equation_0 = const()[name = tensor<string, []>("aw_567_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_567_cast_fp16 = einsum(equation = aw_567_equation_0, values = (var_3964_cast_fp16_3, var_3942_cast_fp16_3))[name = tensor<string, []>("aw_567_cast_fp16")];
+            tensor<string, []> aw_569_equation_0 = const()[name = tensor<string, []>("aw_569_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_569_cast_fp16 = einsum(equation = aw_569_equation_0, values = (var_3964_cast_fp16_4, var_3942_cast_fp16_4))[name = tensor<string, []>("aw_569_cast_fp16")];
+            tensor<string, []> aw_571_equation_0 = const()[name = tensor<string, []>("aw_571_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_571_cast_fp16 = einsum(equation = aw_571_equation_0, values = (var_3964_cast_fp16_5, var_3942_cast_fp16_5))[name = tensor<string, []>("aw_571_cast_fp16")];
+            tensor<string, []> aw_573_equation_0 = const()[name = tensor<string, []>("aw_573_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_573_cast_fp16 = einsum(equation = aw_573_equation_0, values = (var_3964_cast_fp16_6, var_3942_cast_fp16_6))[name = tensor<string, []>("aw_573_cast_fp16")];
+            tensor<string, []> aw_575_equation_0 = const()[name = tensor<string, []>("aw_575_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_575_cast_fp16 = einsum(equation = aw_575_equation_0, values = (var_3964_cast_fp16_7, var_3942_cast_fp16_7))[name = tensor<string, []>("aw_575_cast_fp16")];
+            tensor<string, []> aw_577_equation_0 = const()[name = tensor<string, []>("aw_577_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_577_cast_fp16 = einsum(equation = aw_577_equation_0, values = (var_3964_cast_fp16_8, var_3942_cast_fp16_8))[name = tensor<string, []>("aw_577_cast_fp16")];
+            tensor<string, []> aw_579_equation_0 = const()[name = tensor<string, []>("aw_579_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_579_cast_fp16 = einsum(equation = aw_579_equation_0, values = (var_3964_cast_fp16_9, var_3942_cast_fp16_9))[name = tensor<string, []>("aw_579_cast_fp16")];
+            tensor<string, []> aw_581_equation_0 = const()[name = tensor<string, []>("aw_581_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_581_cast_fp16 = einsum(equation = aw_581_equation_0, values = (var_3964_cast_fp16_10, var_3942_cast_fp16_10))[name = tensor<string, []>("aw_581_cast_fp16")];
+            tensor<string, []> aw_583_equation_0 = const()[name = tensor<string, []>("aw_583_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_583_cast_fp16 = einsum(equation = aw_583_equation_0, values = (var_3964_cast_fp16_11, var_3942_cast_fp16_11))[name = tensor<string, []>("aw_583_cast_fp16")];
+            tensor<string, []> aw_585_equation_0 = const()[name = tensor<string, []>("aw_585_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_585_cast_fp16 = einsum(equation = aw_585_equation_0, values = (var_3964_cast_fp16_12, var_3942_cast_fp16_12))[name = tensor<string, []>("aw_585_cast_fp16")];
+            tensor<string, []> aw_587_equation_0 = const()[name = tensor<string, []>("aw_587_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_587_cast_fp16 = einsum(equation = aw_587_equation_0, values = (var_3964_cast_fp16_13, var_3942_cast_fp16_13))[name = tensor<string, []>("aw_587_cast_fp16")];
+            tensor<string, []> aw_589_equation_0 = const()[name = tensor<string, []>("aw_589_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_589_cast_fp16 = einsum(equation = aw_589_equation_0, values = (var_3964_cast_fp16_14, var_3942_cast_fp16_14))[name = tensor<string, []>("aw_589_cast_fp16")];
+            tensor<string, []> aw_591_equation_0 = const()[name = tensor<string, []>("aw_591_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_591_cast_fp16 = einsum(equation = aw_591_equation_0, values = (var_3964_cast_fp16_15, var_3942_cast_fp16_15))[name = tensor<string, []>("aw_591_cast_fp16")];
+            tensor<string, []> aw_593_equation_0 = const()[name = tensor<string, []>("aw_593_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_593_cast_fp16 = einsum(equation = aw_593_equation_0, values = (var_3964_cast_fp16_16, var_3942_cast_fp16_16))[name = tensor<string, []>("aw_593_cast_fp16")];
+            tensor<string, []> aw_595_equation_0 = const()[name = tensor<string, []>("aw_595_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_595_cast_fp16 = einsum(equation = aw_595_equation_0, values = (var_3964_cast_fp16_17, var_3942_cast_fp16_17))[name = tensor<string, []>("aw_595_cast_fp16")];
+            tensor<string, []> aw_597_equation_0 = const()[name = tensor<string, []>("aw_597_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_597_cast_fp16 = einsum(equation = aw_597_equation_0, values = (var_3964_cast_fp16_18, var_3942_cast_fp16_18))[name = tensor<string, []>("aw_597_cast_fp16")];
+            tensor<string, []> aw_599_equation_0 = const()[name = tensor<string, []>("aw_599_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_599_cast_fp16 = einsum(equation = aw_599_equation_0, values = (var_3964_cast_fp16_19, var_3942_cast_fp16_19))[name = tensor<string, []>("aw_599_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4046_cast_fp16 = softmax(axis = var_3890, x = aw_561_cast_fp16)[name = tensor<string, []>("op_4046_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4047_cast_fp16 = softmax(axis = var_3890, x = aw_563_cast_fp16)[name = tensor<string, []>("op_4047_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4048_cast_fp16 = softmax(axis = var_3890, x = aw_565_cast_fp16)[name = tensor<string, []>("op_4048_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4049_cast_fp16 = softmax(axis = var_3890, x = aw_567_cast_fp16)[name = tensor<string, []>("op_4049_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4050_cast_fp16 = softmax(axis = var_3890, x = aw_569_cast_fp16)[name = tensor<string, []>("op_4050_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4051_cast_fp16 = softmax(axis = var_3890, x = aw_571_cast_fp16)[name = tensor<string, []>("op_4051_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4052_cast_fp16 = softmax(axis = var_3890, x = aw_573_cast_fp16)[name = tensor<string, []>("op_4052_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4053_cast_fp16 = softmax(axis = var_3890, x = aw_575_cast_fp16)[name = tensor<string, []>("op_4053_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4054_cast_fp16 = softmax(axis = var_3890, x = aw_577_cast_fp16)[name = tensor<string, []>("op_4054_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4055_cast_fp16 = softmax(axis = var_3890, x = aw_579_cast_fp16)[name = tensor<string, []>("op_4055_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4056_cast_fp16 = softmax(axis = var_3890, x = aw_581_cast_fp16)[name = tensor<string, []>("op_4056_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4057_cast_fp16 = softmax(axis = var_3890, x = aw_583_cast_fp16)[name = tensor<string, []>("op_4057_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4058_cast_fp16 = softmax(axis = var_3890, x = aw_585_cast_fp16)[name = tensor<string, []>("op_4058_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4059_cast_fp16 = softmax(axis = var_3890, x = aw_587_cast_fp16)[name = tensor<string, []>("op_4059_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4060_cast_fp16 = softmax(axis = var_3890, x = aw_589_cast_fp16)[name = tensor<string, []>("op_4060_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4061_cast_fp16 = softmax(axis = var_3890, x = aw_591_cast_fp16)[name = tensor<string, []>("op_4061_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4062_cast_fp16 = softmax(axis = var_3890, x = aw_593_cast_fp16)[name = tensor<string, []>("op_4062_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4063_cast_fp16 = softmax(axis = var_3890, x = aw_595_cast_fp16)[name = tensor<string, []>("op_4063_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4064_cast_fp16 = softmax(axis = var_3890, x = aw_597_cast_fp16)[name = tensor<string, []>("op_4064_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4065_cast_fp16 = softmax(axis = var_3890, x = aw_599_cast_fp16)[name = tensor<string, []>("op_4065_cast_fp16")];
+            tensor<string, []> var_4067_equation_0 = const()[name = tensor<string, []>("op_4067_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4067_cast_fp16 = einsum(equation = var_4067_equation_0, values = (var_3985_cast_fp16_0, var_4046_cast_fp16))[name = tensor<string, []>("op_4067_cast_fp16")];
+            tensor<string, []> var_4069_equation_0 = const()[name = tensor<string, []>("op_4069_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4069_cast_fp16 = einsum(equation = var_4069_equation_0, values = (var_3985_cast_fp16_1, var_4047_cast_fp16))[name = tensor<string, []>("op_4069_cast_fp16")];
+            tensor<string, []> var_4071_equation_0 = const()[name = tensor<string, []>("op_4071_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4071_cast_fp16 = einsum(equation = var_4071_equation_0, values = (var_3985_cast_fp16_2, var_4048_cast_fp16))[name = tensor<string, []>("op_4071_cast_fp16")];
+            tensor<string, []> var_4073_equation_0 = const()[name = tensor<string, []>("op_4073_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4073_cast_fp16 = einsum(equation = var_4073_equation_0, values = (var_3985_cast_fp16_3, var_4049_cast_fp16))[name = tensor<string, []>("op_4073_cast_fp16")];
+            tensor<string, []> var_4075_equation_0 = const()[name = tensor<string, []>("op_4075_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4075_cast_fp16 = einsum(equation = var_4075_equation_0, values = (var_3985_cast_fp16_4, var_4050_cast_fp16))[name = tensor<string, []>("op_4075_cast_fp16")];
+            tensor<string, []> var_4077_equation_0 = const()[name = tensor<string, []>("op_4077_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4077_cast_fp16 = einsum(equation = var_4077_equation_0, values = (var_3985_cast_fp16_5, var_4051_cast_fp16))[name = tensor<string, []>("op_4077_cast_fp16")];
+            tensor<string, []> var_4079_equation_0 = const()[name = tensor<string, []>("op_4079_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4079_cast_fp16 = einsum(equation = var_4079_equation_0, values = (var_3985_cast_fp16_6, var_4052_cast_fp16))[name = tensor<string, []>("op_4079_cast_fp16")];
+            tensor<string, []> var_4081_equation_0 = const()[name = tensor<string, []>("op_4081_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4081_cast_fp16 = einsum(equation = var_4081_equation_0, values = (var_3985_cast_fp16_7, var_4053_cast_fp16))[name = tensor<string, []>("op_4081_cast_fp16")];
+            tensor<string, []> var_4083_equation_0 = const()[name = tensor<string, []>("op_4083_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4083_cast_fp16 = einsum(equation = var_4083_equation_0, values = (var_3985_cast_fp16_8, var_4054_cast_fp16))[name = tensor<string, []>("op_4083_cast_fp16")];
+            tensor<string, []> var_4085_equation_0 = const()[name = tensor<string, []>("op_4085_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4085_cast_fp16 = einsum(equation = var_4085_equation_0, values = (var_3985_cast_fp16_9, var_4055_cast_fp16))[name = tensor<string, []>("op_4085_cast_fp16")];
+            tensor<string, []> var_4087_equation_0 = const()[name = tensor<string, []>("op_4087_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4087_cast_fp16 = einsum(equation = var_4087_equation_0, values = (var_3985_cast_fp16_10, var_4056_cast_fp16))[name = tensor<string, []>("op_4087_cast_fp16")];
+            tensor<string, []> var_4089_equation_0 = const()[name = tensor<string, []>("op_4089_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4089_cast_fp16 = einsum(equation = var_4089_equation_0, values = (var_3985_cast_fp16_11, var_4057_cast_fp16))[name = tensor<string, []>("op_4089_cast_fp16")];
+            tensor<string, []> var_4091_equation_0 = const()[name = tensor<string, []>("op_4091_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4091_cast_fp16 = einsum(equation = var_4091_equation_0, values = (var_3985_cast_fp16_12, var_4058_cast_fp16))[name = tensor<string, []>("op_4091_cast_fp16")];
+            tensor<string, []> var_4093_equation_0 = const()[name = tensor<string, []>("op_4093_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4093_cast_fp16 = einsum(equation = var_4093_equation_0, values = (var_3985_cast_fp16_13, var_4059_cast_fp16))[name = tensor<string, []>("op_4093_cast_fp16")];
+            tensor<string, []> var_4095_equation_0 = const()[name = tensor<string, []>("op_4095_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4095_cast_fp16 = einsum(equation = var_4095_equation_0, values = (var_3985_cast_fp16_14, var_4060_cast_fp16))[name = tensor<string, []>("op_4095_cast_fp16")];
+            tensor<string, []> var_4097_equation_0 = const()[name = tensor<string, []>("op_4097_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4097_cast_fp16 = einsum(equation = var_4097_equation_0, values = (var_3985_cast_fp16_15, var_4061_cast_fp16))[name = tensor<string, []>("op_4097_cast_fp16")];
+            tensor<string, []> var_4099_equation_0 = const()[name = tensor<string, []>("op_4099_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4099_cast_fp16 = einsum(equation = var_4099_equation_0, values = (var_3985_cast_fp16_16, var_4062_cast_fp16))[name = tensor<string, []>("op_4099_cast_fp16")];
+            tensor<string, []> var_4101_equation_0 = const()[name = tensor<string, []>("op_4101_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4101_cast_fp16 = einsum(equation = var_4101_equation_0, values = (var_3985_cast_fp16_17, var_4063_cast_fp16))[name = tensor<string, []>("op_4101_cast_fp16")];
+            tensor<string, []> var_4103_equation_0 = const()[name = tensor<string, []>("op_4103_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4103_cast_fp16 = einsum(equation = var_4103_equation_0, values = (var_3985_cast_fp16_18, var_4064_cast_fp16))[name = tensor<string, []>("op_4103_cast_fp16")];
+            tensor<string, []> var_4105_equation_0 = const()[name = tensor<string, []>("op_4105_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4105_cast_fp16 = einsum(equation = var_4105_equation_0, values = (var_3985_cast_fp16_19, var_4065_cast_fp16))[name = tensor<string, []>("op_4105_cast_fp16")];
+            tensor<bool, []> input_145_interleave_0 = const()[name = tensor<string, []>("input_145_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_145_cast_fp16 = concat(axis = var_3890, interleave = input_145_interleave_0, values = (var_4067_cast_fp16, var_4069_cast_fp16, var_4071_cast_fp16, var_4073_cast_fp16, var_4075_cast_fp16, var_4077_cast_fp16, var_4079_cast_fp16, var_4081_cast_fp16, var_4083_cast_fp16, var_4085_cast_fp16, var_4087_cast_fp16, var_4089_cast_fp16, var_4091_cast_fp16, var_4093_cast_fp16, var_4095_cast_fp16, var_4097_cast_fp16, var_4099_cast_fp16, var_4101_cast_fp16, var_4103_cast_fp16, var_4105_cast_fp16))[name = tensor<string, []>("input_145_cast_fp16")];
+            tensor<string, []> var_4114_pad_type_0 = const()[name = tensor<string, []>("op_4114_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4114_strides_0 = const()[name = tensor<string, []>("op_4114_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4114_pad_0 = const()[name = tensor<string, []>("op_4114_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4114_dilations_0 = const()[name = tensor<string, []>("op_4114_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4114_groups_0 = const()[name = tensor<string, []>("op_4114_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_14_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(575445952)))];
+            tensor<fp16, [1280]> blocks_14_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578722816)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4114_cast_fp16 = conv(bias = blocks_14_attn_out_bias_to_fp16, dilations = var_4114_dilations_0, groups = var_4114_groups_0, pad = var_4114_pad_0, pad_type = var_4114_pad_type_0, strides = var_4114_strides_0, weight = blocks_14_attn_out_weight_to_fp16, x = input_145_cast_fp16)[name = tensor<string, []>("op_4114_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = var_4114_cast_fp16)[name = tensor<string, []>("inputs_59_cast_fp16")];
+            tensor<int32, [1]> input_147_axes_0 = const()[name = tensor<string, []>("input_147_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_147_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_147_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578725440)))];
+            tensor<fp16, [1280]> input_147_beta_0_to_fp16 = const()[name = tensor<string, []>("input_147_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578728064)))];
+            tensor<fp16, []> var_4124_to_fp16 = const()[name = tensor<string, []>("op_4124_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_147_cast_fp16 = layer_norm(axes = input_147_axes_0, beta = input_147_beta_0_to_fp16, epsilon = var_4124_to_fp16, gamma = input_147_gamma_0_to_fp16, x = inputs_59_cast_fp16)[name = tensor<string, []>("input_147_cast_fp16")];
+            tensor<string, []> input_149_pad_type_0 = const()[name = tensor<string, []>("input_149_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_149_strides_0 = const()[name = tensor<string, []>("input_149_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_149_pad_0 = const()[name = tensor<string, []>("input_149_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_149_dilations_0 = const()[name = tensor<string, []>("input_149_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_149_groups_0 = const()[name = tensor<string, []>("input_149_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_14_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(578730688)))];
+            tensor<fp16, [5120]> blocks_14_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(591837952)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_149_cast_fp16 = conv(bias = blocks_14_mlp_0_bias_to_fp16, dilations = input_149_dilations_0, groups = input_149_groups_0, pad = input_149_pad_0, pad_type = input_149_pad_type_0, strides = input_149_strides_0, weight = blocks_14_mlp_0_weight_to_fp16, x = input_147_cast_fp16)[name = tensor<string, []>("input_149_cast_fp16")];
+            tensor<string, []> input_151_mode_0 = const()[name = tensor<string, []>("input_151_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_151_cast_fp16 = gelu(mode = input_151_mode_0, x = input_149_cast_fp16)[name = tensor<string, []>("input_151_cast_fp16")];
+            tensor<string, []> var_4150_pad_type_0 = const()[name = tensor<string, []>("op_4150_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4150_strides_0 = const()[name = tensor<string, []>("op_4150_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4150_pad_0 = const()[name = tensor<string, []>("op_4150_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4150_dilations_0 = const()[name = tensor<string, []>("op_4150_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4150_groups_0 = const()[name = tensor<string, []>("op_4150_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_14_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(591848256)))];
+            tensor<fp16, [1280]> blocks_14_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(604955520)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4150_cast_fp16 = conv(bias = blocks_14_mlp_2_bias_to_fp16, dilations = var_4150_dilations_0, groups = var_4150_groups_0, pad = var_4150_pad_0, pad_type = var_4150_pad_type_0, strides = var_4150_strides_0, weight = blocks_14_mlp_2_weight_to_fp16, x = input_151_cast_fp16)[name = tensor<string, []>("op_4150_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = var_4150_cast_fp16)[name = tensor<string, []>("inputs_61_cast_fp16")];
+            tensor<int32, []> var_4159 = const()[name = tensor<string, []>("op_4159"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_153_axes_0 = const()[name = tensor<string, []>("input_153_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_153_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_153_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(604958144)))];
+            tensor<fp16, [1280]> input_153_beta_0_to_fp16 = const()[name = tensor<string, []>("input_153_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(604960768)))];
+            tensor<fp16, []> var_4175_to_fp16 = const()[name = tensor<string, []>("op_4175_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_153_cast_fp16 = layer_norm(axes = input_153_axes_0, beta = input_153_beta_0_to_fp16, epsilon = var_4175_to_fp16, gamma = input_153_gamma_0_to_fp16, x = inputs_61_cast_fp16)[name = tensor<string, []>("input_153_cast_fp16")];
+            tensor<string, []> q_31_pad_type_0 = const()[name = tensor<string, []>("q_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_31_strides_0 = const()[name = tensor<string, []>("q_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_31_pad_0 = const()[name = tensor<string, []>("q_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_31_dilations_0 = const()[name = tensor<string, []>("q_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_31_groups_0 = const()[name = tensor<string, []>("q_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_4210_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4210_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(604963392)))];
+            tensor<fp16, [1280]> var_4210_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4210_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(608240256)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4210_cast_fp16 = conv(bias = var_4210_bias_0_to_fp16, dilations = q_31_dilations_0, groups = q_31_groups_0, pad = q_31_pad_0, pad_type = q_31_pad_type_0, strides = q_31_strides_0, weight = var_4210_weight_0_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("op_4210_cast_fp16")];
+            tensor<string, []> k_31_pad_type_0 = const()[name = tensor<string, []>("k_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_31_strides_0 = const()[name = tensor<string, []>("k_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_31_pad_0 = const()[name = tensor<string, []>("k_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_31_dilations_0 = const()[name = tensor<string, []>("k_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_31_groups_0 = const()[name = tensor<string, []>("k_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_15_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(608242880)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_31_cast_fp16 = conv(dilations = k_31_dilations_0, groups = k_31_groups_0, pad = k_31_pad_0, pad_type = k_31_pad_type_0, strides = k_31_strides_0, weight = blocks_15_attn_key_weight_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("k_31_cast_fp16")];
+            tensor<string, []> var_4208_pad_type_0 = const()[name = tensor<string, []>("op_4208_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4208_strides_0 = const()[name = tensor<string, []>("op_4208_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4208_pad_0 = const()[name = tensor<string, []>("op_4208_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4208_dilations_0 = const()[name = tensor<string, []>("op_4208_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4208_groups_0 = const()[name = tensor<string, []>("op_4208_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_15_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(611519744)))];
+            tensor<fp16, [1280]> blocks_15_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614796608)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4208_cast_fp16 = conv(bias = blocks_15_attn_value_bias_to_fp16, dilations = var_4208_dilations_0, groups = var_4208_groups_0, pad = var_4208_pad_0, pad_type = var_4208_pad_type_0, strides = var_4208_strides_0, weight = blocks_15_attn_value_weight_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("op_4208_cast_fp16")];
+            tensor<int32, [20]> tile_45 = const()[name = tensor<string, []>("tile_45"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4211_axis_0 = const()[name = tensor<string, []>("op_4211_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4211_cast_fp16_19 = split(axis = var_4211_axis_0, split_sizes = tile_45, x = var_4210_cast_fp16)[name = tensor<string, []>("op_4211_cast_fp16")];
+            tensor<int32, [4]> var_4232_perm_0 = const()[name = tensor<string, []>("op_4232_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_46 = const()[name = tensor<string, []>("tile_46"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4233_axis_0 = const()[name = tensor<string, []>("op_4233_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_4232_cast_fp16 = transpose(perm = var_4232_perm_0, x = k_31_cast_fp16)[name = tensor<string, []>("transpose_17")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_4233_cast_fp16_19 = split(axis = var_4233_axis_0, split_sizes = tile_46, x = var_4232_cast_fp16)[name = tensor<string, []>("op_4233_cast_fp16")];
+            tensor<int32, [20]> tile_47 = const()[name = tensor<string, []>("tile_47"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4254_axis_0 = const()[name = tensor<string, []>("op_4254_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4254_cast_fp16_19 = split(axis = var_4254_axis_0, split_sizes = tile_47, x = var_4208_cast_fp16)[name = tensor<string, []>("op_4254_cast_fp16")];
+            tensor<string, []> aw_601_equation_0 = const()[name = tensor<string, []>("aw_601_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_601_cast_fp16 = einsum(equation = aw_601_equation_0, values = (var_4233_cast_fp16_0, var_4211_cast_fp16_0))[name = tensor<string, []>("aw_601_cast_fp16")];
+            tensor<string, []> aw_603_equation_0 = const()[name = tensor<string, []>("aw_603_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_603_cast_fp16 = einsum(equation = aw_603_equation_0, values = (var_4233_cast_fp16_1, var_4211_cast_fp16_1))[name = tensor<string, []>("aw_603_cast_fp16")];
+            tensor<string, []> aw_605_equation_0 = const()[name = tensor<string, []>("aw_605_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_605_cast_fp16 = einsum(equation = aw_605_equation_0, values = (var_4233_cast_fp16_2, var_4211_cast_fp16_2))[name = tensor<string, []>("aw_605_cast_fp16")];
+            tensor<string, []> aw_607_equation_0 = const()[name = tensor<string, []>("aw_607_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_607_cast_fp16 = einsum(equation = aw_607_equation_0, values = (var_4233_cast_fp16_3, var_4211_cast_fp16_3))[name = tensor<string, []>("aw_607_cast_fp16")];
+            tensor<string, []> aw_609_equation_0 = const()[name = tensor<string, []>("aw_609_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_609_cast_fp16 = einsum(equation = aw_609_equation_0, values = (var_4233_cast_fp16_4, var_4211_cast_fp16_4))[name = tensor<string, []>("aw_609_cast_fp16")];
+            tensor<string, []> aw_611_equation_0 = const()[name = tensor<string, []>("aw_611_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_611_cast_fp16 = einsum(equation = aw_611_equation_0, values = (var_4233_cast_fp16_5, var_4211_cast_fp16_5))[name = tensor<string, []>("aw_611_cast_fp16")];
+            tensor<string, []> aw_613_equation_0 = const()[name = tensor<string, []>("aw_613_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_613_cast_fp16 = einsum(equation = aw_613_equation_0, values = (var_4233_cast_fp16_6, var_4211_cast_fp16_6))[name = tensor<string, []>("aw_613_cast_fp16")];
+            tensor<string, []> aw_615_equation_0 = const()[name = tensor<string, []>("aw_615_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_615_cast_fp16 = einsum(equation = aw_615_equation_0, values = (var_4233_cast_fp16_7, var_4211_cast_fp16_7))[name = tensor<string, []>("aw_615_cast_fp16")];
+            tensor<string, []> aw_617_equation_0 = const()[name = tensor<string, []>("aw_617_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_617_cast_fp16 = einsum(equation = aw_617_equation_0, values = (var_4233_cast_fp16_8, var_4211_cast_fp16_8))[name = tensor<string, []>("aw_617_cast_fp16")];
+            tensor<string, []> aw_619_equation_0 = const()[name = tensor<string, []>("aw_619_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_619_cast_fp16 = einsum(equation = aw_619_equation_0, values = (var_4233_cast_fp16_9, var_4211_cast_fp16_9))[name = tensor<string, []>("aw_619_cast_fp16")];
+            tensor<string, []> aw_621_equation_0 = const()[name = tensor<string, []>("aw_621_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_621_cast_fp16 = einsum(equation = aw_621_equation_0, values = (var_4233_cast_fp16_10, var_4211_cast_fp16_10))[name = tensor<string, []>("aw_621_cast_fp16")];
+            tensor<string, []> aw_623_equation_0 = const()[name = tensor<string, []>("aw_623_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_623_cast_fp16 = einsum(equation = aw_623_equation_0, values = (var_4233_cast_fp16_11, var_4211_cast_fp16_11))[name = tensor<string, []>("aw_623_cast_fp16")];
+            tensor<string, []> aw_625_equation_0 = const()[name = tensor<string, []>("aw_625_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_625_cast_fp16 = einsum(equation = aw_625_equation_0, values = (var_4233_cast_fp16_12, var_4211_cast_fp16_12))[name = tensor<string, []>("aw_625_cast_fp16")];
+            tensor<string, []> aw_627_equation_0 = const()[name = tensor<string, []>("aw_627_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_627_cast_fp16 = einsum(equation = aw_627_equation_0, values = (var_4233_cast_fp16_13, var_4211_cast_fp16_13))[name = tensor<string, []>("aw_627_cast_fp16")];
+            tensor<string, []> aw_629_equation_0 = const()[name = tensor<string, []>("aw_629_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_629_cast_fp16 = einsum(equation = aw_629_equation_0, values = (var_4233_cast_fp16_14, var_4211_cast_fp16_14))[name = tensor<string, []>("aw_629_cast_fp16")];
+            tensor<string, []> aw_631_equation_0 = const()[name = tensor<string, []>("aw_631_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_631_cast_fp16 = einsum(equation = aw_631_equation_0, values = (var_4233_cast_fp16_15, var_4211_cast_fp16_15))[name = tensor<string, []>("aw_631_cast_fp16")];
+            tensor<string, []> aw_633_equation_0 = const()[name = tensor<string, []>("aw_633_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_633_cast_fp16 = einsum(equation = aw_633_equation_0, values = (var_4233_cast_fp16_16, var_4211_cast_fp16_16))[name = tensor<string, []>("aw_633_cast_fp16")];
+            tensor<string, []> aw_635_equation_0 = const()[name = tensor<string, []>("aw_635_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_635_cast_fp16 = einsum(equation = aw_635_equation_0, values = (var_4233_cast_fp16_17, var_4211_cast_fp16_17))[name = tensor<string, []>("aw_635_cast_fp16")];
+            tensor<string, []> aw_637_equation_0 = const()[name = tensor<string, []>("aw_637_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_637_cast_fp16 = einsum(equation = aw_637_equation_0, values = (var_4233_cast_fp16_18, var_4211_cast_fp16_18))[name = tensor<string, []>("aw_637_cast_fp16")];
+            tensor<string, []> aw_639_equation_0 = const()[name = tensor<string, []>("aw_639_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_639_cast_fp16 = einsum(equation = aw_639_equation_0, values = (var_4233_cast_fp16_19, var_4211_cast_fp16_19))[name = tensor<string, []>("aw_639_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4315_cast_fp16 = softmax(axis = var_4159, x = aw_601_cast_fp16)[name = tensor<string, []>("op_4315_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4316_cast_fp16 = softmax(axis = var_4159, x = aw_603_cast_fp16)[name = tensor<string, []>("op_4316_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4317_cast_fp16 = softmax(axis = var_4159, x = aw_605_cast_fp16)[name = tensor<string, []>("op_4317_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4318_cast_fp16 = softmax(axis = var_4159, x = aw_607_cast_fp16)[name = tensor<string, []>("op_4318_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4319_cast_fp16 = softmax(axis = var_4159, x = aw_609_cast_fp16)[name = tensor<string, []>("op_4319_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4320_cast_fp16 = softmax(axis = var_4159, x = aw_611_cast_fp16)[name = tensor<string, []>("op_4320_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4321_cast_fp16 = softmax(axis = var_4159, x = aw_613_cast_fp16)[name = tensor<string, []>("op_4321_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4322_cast_fp16 = softmax(axis = var_4159, x = aw_615_cast_fp16)[name = tensor<string, []>("op_4322_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4323_cast_fp16 = softmax(axis = var_4159, x = aw_617_cast_fp16)[name = tensor<string, []>("op_4323_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4324_cast_fp16 = softmax(axis = var_4159, x = aw_619_cast_fp16)[name = tensor<string, []>("op_4324_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4325_cast_fp16 = softmax(axis = var_4159, x = aw_621_cast_fp16)[name = tensor<string, []>("op_4325_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4326_cast_fp16 = softmax(axis = var_4159, x = aw_623_cast_fp16)[name = tensor<string, []>("op_4326_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4327_cast_fp16 = softmax(axis = var_4159, x = aw_625_cast_fp16)[name = tensor<string, []>("op_4327_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4328_cast_fp16 = softmax(axis = var_4159, x = aw_627_cast_fp16)[name = tensor<string, []>("op_4328_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4329_cast_fp16 = softmax(axis = var_4159, x = aw_629_cast_fp16)[name = tensor<string, []>("op_4329_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4330_cast_fp16 = softmax(axis = var_4159, x = aw_631_cast_fp16)[name = tensor<string, []>("op_4330_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4331_cast_fp16 = softmax(axis = var_4159, x = aw_633_cast_fp16)[name = tensor<string, []>("op_4331_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4332_cast_fp16 = softmax(axis = var_4159, x = aw_635_cast_fp16)[name = tensor<string, []>("op_4332_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4333_cast_fp16 = softmax(axis = var_4159, x = aw_637_cast_fp16)[name = tensor<string, []>("op_4333_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4334_cast_fp16 = softmax(axis = var_4159, x = aw_639_cast_fp16)[name = tensor<string, []>("op_4334_cast_fp16")];
+            tensor<string, []> var_4336_equation_0 = const()[name = tensor<string, []>("op_4336_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4336_cast_fp16 = einsum(equation = var_4336_equation_0, values = (var_4254_cast_fp16_0, var_4315_cast_fp16))[name = tensor<string, []>("op_4336_cast_fp16")];
+            tensor<string, []> var_4338_equation_0 = const()[name = tensor<string, []>("op_4338_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4338_cast_fp16 = einsum(equation = var_4338_equation_0, values = (var_4254_cast_fp16_1, var_4316_cast_fp16))[name = tensor<string, []>("op_4338_cast_fp16")];
+            tensor<string, []> var_4340_equation_0 = const()[name = tensor<string, []>("op_4340_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4340_cast_fp16 = einsum(equation = var_4340_equation_0, values = (var_4254_cast_fp16_2, var_4317_cast_fp16))[name = tensor<string, []>("op_4340_cast_fp16")];
+            tensor<string, []> var_4342_equation_0 = const()[name = tensor<string, []>("op_4342_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4342_cast_fp16 = einsum(equation = var_4342_equation_0, values = (var_4254_cast_fp16_3, var_4318_cast_fp16))[name = tensor<string, []>("op_4342_cast_fp16")];
+            tensor<string, []> var_4344_equation_0 = const()[name = tensor<string, []>("op_4344_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4344_cast_fp16 = einsum(equation = var_4344_equation_0, values = (var_4254_cast_fp16_4, var_4319_cast_fp16))[name = tensor<string, []>("op_4344_cast_fp16")];
+            tensor<string, []> var_4346_equation_0 = const()[name = tensor<string, []>("op_4346_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4346_cast_fp16 = einsum(equation = var_4346_equation_0, values = (var_4254_cast_fp16_5, var_4320_cast_fp16))[name = tensor<string, []>("op_4346_cast_fp16")];
+            tensor<string, []> var_4348_equation_0 = const()[name = tensor<string, []>("op_4348_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4348_cast_fp16 = einsum(equation = var_4348_equation_0, values = (var_4254_cast_fp16_6, var_4321_cast_fp16))[name = tensor<string, []>("op_4348_cast_fp16")];
+            tensor<string, []> var_4350_equation_0 = const()[name = tensor<string, []>("op_4350_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4350_cast_fp16 = einsum(equation = var_4350_equation_0, values = (var_4254_cast_fp16_7, var_4322_cast_fp16))[name = tensor<string, []>("op_4350_cast_fp16")];
+            tensor<string, []> var_4352_equation_0 = const()[name = tensor<string, []>("op_4352_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4352_cast_fp16 = einsum(equation = var_4352_equation_0, values = (var_4254_cast_fp16_8, var_4323_cast_fp16))[name = tensor<string, []>("op_4352_cast_fp16")];
+            tensor<string, []> var_4354_equation_0 = const()[name = tensor<string, []>("op_4354_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4354_cast_fp16 = einsum(equation = var_4354_equation_0, values = (var_4254_cast_fp16_9, var_4324_cast_fp16))[name = tensor<string, []>("op_4354_cast_fp16")];
+            tensor<string, []> var_4356_equation_0 = const()[name = tensor<string, []>("op_4356_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4356_cast_fp16 = einsum(equation = var_4356_equation_0, values = (var_4254_cast_fp16_10, var_4325_cast_fp16))[name = tensor<string, []>("op_4356_cast_fp16")];
+            tensor<string, []> var_4358_equation_0 = const()[name = tensor<string, []>("op_4358_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4358_cast_fp16 = einsum(equation = var_4358_equation_0, values = (var_4254_cast_fp16_11, var_4326_cast_fp16))[name = tensor<string, []>("op_4358_cast_fp16")];
+            tensor<string, []> var_4360_equation_0 = const()[name = tensor<string, []>("op_4360_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4360_cast_fp16 = einsum(equation = var_4360_equation_0, values = (var_4254_cast_fp16_12, var_4327_cast_fp16))[name = tensor<string, []>("op_4360_cast_fp16")];
+            tensor<string, []> var_4362_equation_0 = const()[name = tensor<string, []>("op_4362_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4362_cast_fp16 = einsum(equation = var_4362_equation_0, values = (var_4254_cast_fp16_13, var_4328_cast_fp16))[name = tensor<string, []>("op_4362_cast_fp16")];
+            tensor<string, []> var_4364_equation_0 = const()[name = tensor<string, []>("op_4364_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4364_cast_fp16 = einsum(equation = var_4364_equation_0, values = (var_4254_cast_fp16_14, var_4329_cast_fp16))[name = tensor<string, []>("op_4364_cast_fp16")];
+            tensor<string, []> var_4366_equation_0 = const()[name = tensor<string, []>("op_4366_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4366_cast_fp16 = einsum(equation = var_4366_equation_0, values = (var_4254_cast_fp16_15, var_4330_cast_fp16))[name = tensor<string, []>("op_4366_cast_fp16")];
+            tensor<string, []> var_4368_equation_0 = const()[name = tensor<string, []>("op_4368_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4368_cast_fp16 = einsum(equation = var_4368_equation_0, values = (var_4254_cast_fp16_16, var_4331_cast_fp16))[name = tensor<string, []>("op_4368_cast_fp16")];
+            tensor<string, []> var_4370_equation_0 = const()[name = tensor<string, []>("op_4370_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4370_cast_fp16 = einsum(equation = var_4370_equation_0, values = (var_4254_cast_fp16_17, var_4332_cast_fp16))[name = tensor<string, []>("op_4370_cast_fp16")];
+            tensor<string, []> var_4372_equation_0 = const()[name = tensor<string, []>("op_4372_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4372_cast_fp16 = einsum(equation = var_4372_equation_0, values = (var_4254_cast_fp16_18, var_4333_cast_fp16))[name = tensor<string, []>("op_4372_cast_fp16")];
+            tensor<string, []> var_4374_equation_0 = const()[name = tensor<string, []>("op_4374_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4374_cast_fp16 = einsum(equation = var_4374_equation_0, values = (var_4254_cast_fp16_19, var_4334_cast_fp16))[name = tensor<string, []>("op_4374_cast_fp16")];
+            tensor<bool, []> input_155_interleave_0 = const()[name = tensor<string, []>("input_155_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_155_cast_fp16 = concat(axis = var_4159, interleave = input_155_interleave_0, values = (var_4336_cast_fp16, var_4338_cast_fp16, var_4340_cast_fp16, var_4342_cast_fp16, var_4344_cast_fp16, var_4346_cast_fp16, var_4348_cast_fp16, var_4350_cast_fp16, var_4352_cast_fp16, var_4354_cast_fp16, var_4356_cast_fp16, var_4358_cast_fp16, var_4360_cast_fp16, var_4362_cast_fp16, var_4364_cast_fp16, var_4366_cast_fp16, var_4368_cast_fp16, var_4370_cast_fp16, var_4372_cast_fp16, var_4374_cast_fp16))[name = tensor<string, []>("input_155_cast_fp16")];
+            tensor<string, []> var_4383_pad_type_0 = const()[name = tensor<string, []>("op_4383_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4383_strides_0 = const()[name = tensor<string, []>("op_4383_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4383_pad_0 = const()[name = tensor<string, []>("op_4383_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4383_dilations_0 = const()[name = tensor<string, []>("op_4383_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4383_groups_0 = const()[name = tensor<string, []>("op_4383_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_15_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614799232)))];
+            tensor<fp16, [1280]> blocks_15_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(618076096)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4383_cast_fp16 = conv(bias = blocks_15_attn_out_bias_to_fp16, dilations = var_4383_dilations_0, groups = var_4383_groups_0, pad = var_4383_pad_0, pad_type = var_4383_pad_type_0, strides = var_4383_strides_0, weight = blocks_15_attn_out_weight_to_fp16, x = input_155_cast_fp16)[name = tensor<string, []>("op_4383_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = var_4383_cast_fp16)[name = tensor<string, []>("inputs_63_cast_fp16")];
+            tensor<int32, [1]> input_157_axes_0 = const()[name = tensor<string, []>("input_157_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_157_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_157_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(618078720)))];
+            tensor<fp16, [1280]> input_157_beta_0_to_fp16 = const()[name = tensor<string, []>("input_157_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(618081344)))];
+            tensor<fp16, []> var_4393_to_fp16 = const()[name = tensor<string, []>("op_4393_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_157_cast_fp16 = layer_norm(axes = input_157_axes_0, beta = input_157_beta_0_to_fp16, epsilon = var_4393_to_fp16, gamma = input_157_gamma_0_to_fp16, x = inputs_63_cast_fp16)[name = tensor<string, []>("input_157_cast_fp16")];
+            tensor<string, []> input_159_pad_type_0 = const()[name = tensor<string, []>("input_159_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_159_strides_0 = const()[name = tensor<string, []>("input_159_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_159_pad_0 = const()[name = tensor<string, []>("input_159_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_159_dilations_0 = const()[name = tensor<string, []>("input_159_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_159_groups_0 = const()[name = tensor<string, []>("input_159_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_15_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(618083968)))];
+            tensor<fp16, [5120]> blocks_15_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(631191232)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_159_cast_fp16 = conv(bias = blocks_15_mlp_0_bias_to_fp16, dilations = input_159_dilations_0, groups = input_159_groups_0, pad = input_159_pad_0, pad_type = input_159_pad_type_0, strides = input_159_strides_0, weight = blocks_15_mlp_0_weight_to_fp16, x = input_157_cast_fp16)[name = tensor<string, []>("input_159_cast_fp16")];
+            tensor<string, []> input_161_mode_0 = const()[name = tensor<string, []>("input_161_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_161_cast_fp16 = gelu(mode = input_161_mode_0, x = input_159_cast_fp16)[name = tensor<string, []>("input_161_cast_fp16")];
+            tensor<string, []> var_4419_pad_type_0 = const()[name = tensor<string, []>("op_4419_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4419_strides_0 = const()[name = tensor<string, []>("op_4419_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4419_pad_0 = const()[name = tensor<string, []>("op_4419_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4419_dilations_0 = const()[name = tensor<string, []>("op_4419_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4419_groups_0 = const()[name = tensor<string, []>("op_4419_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_15_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(631201536)))];
+            tensor<fp16, [1280]> blocks_15_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(644308800)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4419_cast_fp16 = conv(bias = blocks_15_mlp_2_bias_to_fp16, dilations = var_4419_dilations_0, groups = var_4419_groups_0, pad = var_4419_pad_0, pad_type = var_4419_pad_type_0, strides = var_4419_strides_0, weight = blocks_15_mlp_2_weight_to_fp16, x = input_161_cast_fp16)[name = tensor<string, []>("op_4419_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = var_4419_cast_fp16)[name = tensor<string, []>("inputs_65_cast_fp16")];
+            tensor<int32, []> var_4428 = const()[name = tensor<string, []>("op_4428"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_163_axes_0 = const()[name = tensor<string, []>("input_163_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_163_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_163_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(644311424)))];
+            tensor<fp16, [1280]> input_163_beta_0_to_fp16 = const()[name = tensor<string, []>("input_163_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(644314048)))];
+            tensor<fp16, []> var_4444_to_fp16 = const()[name = tensor<string, []>("op_4444_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_163_cast_fp16 = layer_norm(axes = input_163_axes_0, beta = input_163_beta_0_to_fp16, epsilon = var_4444_to_fp16, gamma = input_163_gamma_0_to_fp16, x = inputs_65_cast_fp16)[name = tensor<string, []>("input_163_cast_fp16")];
+            tensor<string, []> q_33_pad_type_0 = const()[name = tensor<string, []>("q_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_33_strides_0 = const()[name = tensor<string, []>("q_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_33_pad_0 = const()[name = tensor<string, []>("q_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_33_dilations_0 = const()[name = tensor<string, []>("q_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_33_groups_0 = const()[name = tensor<string, []>("q_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_4479_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4479_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(644316672)))];
+            tensor<fp16, [1280]> var_4479_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4479_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(647593536)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4479_cast_fp16 = conv(bias = var_4479_bias_0_to_fp16, dilations = q_33_dilations_0, groups = q_33_groups_0, pad = q_33_pad_0, pad_type = q_33_pad_type_0, strides = q_33_strides_0, weight = var_4479_weight_0_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("op_4479_cast_fp16")];
+            tensor<string, []> k_33_pad_type_0 = const()[name = tensor<string, []>("k_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_33_strides_0 = const()[name = tensor<string, []>("k_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_33_pad_0 = const()[name = tensor<string, []>("k_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_33_dilations_0 = const()[name = tensor<string, []>("k_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_33_groups_0 = const()[name = tensor<string, []>("k_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_16_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(647596160)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_33_cast_fp16 = conv(dilations = k_33_dilations_0, groups = k_33_groups_0, pad = k_33_pad_0, pad_type = k_33_pad_type_0, strides = k_33_strides_0, weight = blocks_16_attn_key_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("k_33_cast_fp16")];
+            tensor<string, []> var_4477_pad_type_0 = const()[name = tensor<string, []>("op_4477_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4477_strides_0 = const()[name = tensor<string, []>("op_4477_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4477_pad_0 = const()[name = tensor<string, []>("op_4477_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4477_dilations_0 = const()[name = tensor<string, []>("op_4477_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4477_groups_0 = const()[name = tensor<string, []>("op_4477_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_16_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(650873024)))];
+            tensor<fp16, [1280]> blocks_16_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(654149888)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4477_cast_fp16 = conv(bias = blocks_16_attn_value_bias_to_fp16, dilations = var_4477_dilations_0, groups = var_4477_groups_0, pad = var_4477_pad_0, pad_type = var_4477_pad_type_0, strides = var_4477_strides_0, weight = blocks_16_attn_value_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("op_4477_cast_fp16")];
+            tensor<int32, [20]> tile_48 = const()[name = tensor<string, []>("tile_48"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4480_axis_0 = const()[name = tensor<string, []>("op_4480_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4480_cast_fp16_19 = split(axis = var_4480_axis_0, split_sizes = tile_48, x = var_4479_cast_fp16)[name = tensor<string, []>("op_4480_cast_fp16")];
+            tensor<int32, [4]> var_4501_perm_0 = const()[name = tensor<string, []>("op_4501_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_49 = const()[name = tensor<string, []>("tile_49"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4502_axis_0 = const()[name = tensor<string, []>("op_4502_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_4501_cast_fp16 = transpose(perm = var_4501_perm_0, x = k_33_cast_fp16)[name = tensor<string, []>("transpose_16")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_4502_cast_fp16_19 = split(axis = var_4502_axis_0, split_sizes = tile_49, x = var_4501_cast_fp16)[name = tensor<string, []>("op_4502_cast_fp16")];
+            tensor<int32, [20]> tile_50 = const()[name = tensor<string, []>("tile_50"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4523_axis_0 = const()[name = tensor<string, []>("op_4523_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4523_cast_fp16_19 = split(axis = var_4523_axis_0, split_sizes = tile_50, x = var_4477_cast_fp16)[name = tensor<string, []>("op_4523_cast_fp16")];
+            tensor<string, []> aw_641_equation_0 = const()[name = tensor<string, []>("aw_641_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_641_cast_fp16 = einsum(equation = aw_641_equation_0, values = (var_4502_cast_fp16_0, var_4480_cast_fp16_0))[name = tensor<string, []>("aw_641_cast_fp16")];
+            tensor<string, []> aw_643_equation_0 = const()[name = tensor<string, []>("aw_643_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_643_cast_fp16 = einsum(equation = aw_643_equation_0, values = (var_4502_cast_fp16_1, var_4480_cast_fp16_1))[name = tensor<string, []>("aw_643_cast_fp16")];
+            tensor<string, []> aw_645_equation_0 = const()[name = tensor<string, []>("aw_645_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_645_cast_fp16 = einsum(equation = aw_645_equation_0, values = (var_4502_cast_fp16_2, var_4480_cast_fp16_2))[name = tensor<string, []>("aw_645_cast_fp16")];
+            tensor<string, []> aw_647_equation_0 = const()[name = tensor<string, []>("aw_647_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_647_cast_fp16 = einsum(equation = aw_647_equation_0, values = (var_4502_cast_fp16_3, var_4480_cast_fp16_3))[name = tensor<string, []>("aw_647_cast_fp16")];
+            tensor<string, []> aw_649_equation_0 = const()[name = tensor<string, []>("aw_649_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_649_cast_fp16 = einsum(equation = aw_649_equation_0, values = (var_4502_cast_fp16_4, var_4480_cast_fp16_4))[name = tensor<string, []>("aw_649_cast_fp16")];
+            tensor<string, []> aw_651_equation_0 = const()[name = tensor<string, []>("aw_651_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_651_cast_fp16 = einsum(equation = aw_651_equation_0, values = (var_4502_cast_fp16_5, var_4480_cast_fp16_5))[name = tensor<string, []>("aw_651_cast_fp16")];
+            tensor<string, []> aw_653_equation_0 = const()[name = tensor<string, []>("aw_653_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_653_cast_fp16 = einsum(equation = aw_653_equation_0, values = (var_4502_cast_fp16_6, var_4480_cast_fp16_6))[name = tensor<string, []>("aw_653_cast_fp16")];
+            tensor<string, []> aw_655_equation_0 = const()[name = tensor<string, []>("aw_655_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_655_cast_fp16 = einsum(equation = aw_655_equation_0, values = (var_4502_cast_fp16_7, var_4480_cast_fp16_7))[name = tensor<string, []>("aw_655_cast_fp16")];
+            tensor<string, []> aw_657_equation_0 = const()[name = tensor<string, []>("aw_657_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_657_cast_fp16 = einsum(equation = aw_657_equation_0, values = (var_4502_cast_fp16_8, var_4480_cast_fp16_8))[name = tensor<string, []>("aw_657_cast_fp16")];
+            tensor<string, []> aw_659_equation_0 = const()[name = tensor<string, []>("aw_659_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_659_cast_fp16 = einsum(equation = aw_659_equation_0, values = (var_4502_cast_fp16_9, var_4480_cast_fp16_9))[name = tensor<string, []>("aw_659_cast_fp16")];
+            tensor<string, []> aw_661_equation_0 = const()[name = tensor<string, []>("aw_661_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_661_cast_fp16 = einsum(equation = aw_661_equation_0, values = (var_4502_cast_fp16_10, var_4480_cast_fp16_10))[name = tensor<string, []>("aw_661_cast_fp16")];
+            tensor<string, []> aw_663_equation_0 = const()[name = tensor<string, []>("aw_663_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_663_cast_fp16 = einsum(equation = aw_663_equation_0, values = (var_4502_cast_fp16_11, var_4480_cast_fp16_11))[name = tensor<string, []>("aw_663_cast_fp16")];
+            tensor<string, []> aw_665_equation_0 = const()[name = tensor<string, []>("aw_665_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_665_cast_fp16 = einsum(equation = aw_665_equation_0, values = (var_4502_cast_fp16_12, var_4480_cast_fp16_12))[name = tensor<string, []>("aw_665_cast_fp16")];
+            tensor<string, []> aw_667_equation_0 = const()[name = tensor<string, []>("aw_667_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_667_cast_fp16 = einsum(equation = aw_667_equation_0, values = (var_4502_cast_fp16_13, var_4480_cast_fp16_13))[name = tensor<string, []>("aw_667_cast_fp16")];
+            tensor<string, []> aw_669_equation_0 = const()[name = tensor<string, []>("aw_669_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_669_cast_fp16 = einsum(equation = aw_669_equation_0, values = (var_4502_cast_fp16_14, var_4480_cast_fp16_14))[name = tensor<string, []>("aw_669_cast_fp16")];
+            tensor<string, []> aw_671_equation_0 = const()[name = tensor<string, []>("aw_671_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_671_cast_fp16 = einsum(equation = aw_671_equation_0, values = (var_4502_cast_fp16_15, var_4480_cast_fp16_15))[name = tensor<string, []>("aw_671_cast_fp16")];
+            tensor<string, []> aw_673_equation_0 = const()[name = tensor<string, []>("aw_673_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_673_cast_fp16 = einsum(equation = aw_673_equation_0, values = (var_4502_cast_fp16_16, var_4480_cast_fp16_16))[name = tensor<string, []>("aw_673_cast_fp16")];
+            tensor<string, []> aw_675_equation_0 = const()[name = tensor<string, []>("aw_675_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_675_cast_fp16 = einsum(equation = aw_675_equation_0, values = (var_4502_cast_fp16_17, var_4480_cast_fp16_17))[name = tensor<string, []>("aw_675_cast_fp16")];
+            tensor<string, []> aw_677_equation_0 = const()[name = tensor<string, []>("aw_677_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_677_cast_fp16 = einsum(equation = aw_677_equation_0, values = (var_4502_cast_fp16_18, var_4480_cast_fp16_18))[name = tensor<string, []>("aw_677_cast_fp16")];
+            tensor<string, []> aw_679_equation_0 = const()[name = tensor<string, []>("aw_679_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_679_cast_fp16 = einsum(equation = aw_679_equation_0, values = (var_4502_cast_fp16_19, var_4480_cast_fp16_19))[name = tensor<string, []>("aw_679_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4584_cast_fp16 = softmax(axis = var_4428, x = aw_641_cast_fp16)[name = tensor<string, []>("op_4584_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4585_cast_fp16 = softmax(axis = var_4428, x = aw_643_cast_fp16)[name = tensor<string, []>("op_4585_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4586_cast_fp16 = softmax(axis = var_4428, x = aw_645_cast_fp16)[name = tensor<string, []>("op_4586_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4587_cast_fp16 = softmax(axis = var_4428, x = aw_647_cast_fp16)[name = tensor<string, []>("op_4587_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4588_cast_fp16 = softmax(axis = var_4428, x = aw_649_cast_fp16)[name = tensor<string, []>("op_4588_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4589_cast_fp16 = softmax(axis = var_4428, x = aw_651_cast_fp16)[name = tensor<string, []>("op_4589_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4590_cast_fp16 = softmax(axis = var_4428, x = aw_653_cast_fp16)[name = tensor<string, []>("op_4590_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4591_cast_fp16 = softmax(axis = var_4428, x = aw_655_cast_fp16)[name = tensor<string, []>("op_4591_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4592_cast_fp16 = softmax(axis = var_4428, x = aw_657_cast_fp16)[name = tensor<string, []>("op_4592_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4593_cast_fp16 = softmax(axis = var_4428, x = aw_659_cast_fp16)[name = tensor<string, []>("op_4593_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4594_cast_fp16 = softmax(axis = var_4428, x = aw_661_cast_fp16)[name = tensor<string, []>("op_4594_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4595_cast_fp16 = softmax(axis = var_4428, x = aw_663_cast_fp16)[name = tensor<string, []>("op_4595_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4596_cast_fp16 = softmax(axis = var_4428, x = aw_665_cast_fp16)[name = tensor<string, []>("op_4596_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4597_cast_fp16 = softmax(axis = var_4428, x = aw_667_cast_fp16)[name = tensor<string, []>("op_4597_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4598_cast_fp16 = softmax(axis = var_4428, x = aw_669_cast_fp16)[name = tensor<string, []>("op_4598_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4599_cast_fp16 = softmax(axis = var_4428, x = aw_671_cast_fp16)[name = tensor<string, []>("op_4599_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4600_cast_fp16 = softmax(axis = var_4428, x = aw_673_cast_fp16)[name = tensor<string, []>("op_4600_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4601_cast_fp16 = softmax(axis = var_4428, x = aw_675_cast_fp16)[name = tensor<string, []>("op_4601_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4602_cast_fp16 = softmax(axis = var_4428, x = aw_677_cast_fp16)[name = tensor<string, []>("op_4602_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4603_cast_fp16 = softmax(axis = var_4428, x = aw_679_cast_fp16)[name = tensor<string, []>("op_4603_cast_fp16")];
+            tensor<string, []> var_4605_equation_0 = const()[name = tensor<string, []>("op_4605_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4605_cast_fp16 = einsum(equation = var_4605_equation_0, values = (var_4523_cast_fp16_0, var_4584_cast_fp16))[name = tensor<string, []>("op_4605_cast_fp16")];
+            tensor<string, []> var_4607_equation_0 = const()[name = tensor<string, []>("op_4607_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4607_cast_fp16 = einsum(equation = var_4607_equation_0, values = (var_4523_cast_fp16_1, var_4585_cast_fp16))[name = tensor<string, []>("op_4607_cast_fp16")];
+            tensor<string, []> var_4609_equation_0 = const()[name = tensor<string, []>("op_4609_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4609_cast_fp16 = einsum(equation = var_4609_equation_0, values = (var_4523_cast_fp16_2, var_4586_cast_fp16))[name = tensor<string, []>("op_4609_cast_fp16")];
+            tensor<string, []> var_4611_equation_0 = const()[name = tensor<string, []>("op_4611_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4611_cast_fp16 = einsum(equation = var_4611_equation_0, values = (var_4523_cast_fp16_3, var_4587_cast_fp16))[name = tensor<string, []>("op_4611_cast_fp16")];
+            tensor<string, []> var_4613_equation_0 = const()[name = tensor<string, []>("op_4613_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4613_cast_fp16 = einsum(equation = var_4613_equation_0, values = (var_4523_cast_fp16_4, var_4588_cast_fp16))[name = tensor<string, []>("op_4613_cast_fp16")];
+            tensor<string, []> var_4615_equation_0 = const()[name = tensor<string, []>("op_4615_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4615_cast_fp16 = einsum(equation = var_4615_equation_0, values = (var_4523_cast_fp16_5, var_4589_cast_fp16))[name = tensor<string, []>("op_4615_cast_fp16")];
+            tensor<string, []> var_4617_equation_0 = const()[name = tensor<string, []>("op_4617_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4617_cast_fp16 = einsum(equation = var_4617_equation_0, values = (var_4523_cast_fp16_6, var_4590_cast_fp16))[name = tensor<string, []>("op_4617_cast_fp16")];
+            tensor<string, []> var_4619_equation_0 = const()[name = tensor<string, []>("op_4619_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4619_cast_fp16 = einsum(equation = var_4619_equation_0, values = (var_4523_cast_fp16_7, var_4591_cast_fp16))[name = tensor<string, []>("op_4619_cast_fp16")];
+            tensor<string, []> var_4621_equation_0 = const()[name = tensor<string, []>("op_4621_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4621_cast_fp16 = einsum(equation = var_4621_equation_0, values = (var_4523_cast_fp16_8, var_4592_cast_fp16))[name = tensor<string, []>("op_4621_cast_fp16")];
+            tensor<string, []> var_4623_equation_0 = const()[name = tensor<string, []>("op_4623_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4623_cast_fp16 = einsum(equation = var_4623_equation_0, values = (var_4523_cast_fp16_9, var_4593_cast_fp16))[name = tensor<string, []>("op_4623_cast_fp16")];
+            tensor<string, []> var_4625_equation_0 = const()[name = tensor<string, []>("op_4625_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4625_cast_fp16 = einsum(equation = var_4625_equation_0, values = (var_4523_cast_fp16_10, var_4594_cast_fp16))[name = tensor<string, []>("op_4625_cast_fp16")];
+            tensor<string, []> var_4627_equation_0 = const()[name = tensor<string, []>("op_4627_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4627_cast_fp16 = einsum(equation = var_4627_equation_0, values = (var_4523_cast_fp16_11, var_4595_cast_fp16))[name = tensor<string, []>("op_4627_cast_fp16")];
+            tensor<string, []> var_4629_equation_0 = const()[name = tensor<string, []>("op_4629_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4629_cast_fp16 = einsum(equation = var_4629_equation_0, values = (var_4523_cast_fp16_12, var_4596_cast_fp16))[name = tensor<string, []>("op_4629_cast_fp16")];
+            tensor<string, []> var_4631_equation_0 = const()[name = tensor<string, []>("op_4631_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4631_cast_fp16 = einsum(equation = var_4631_equation_0, values = (var_4523_cast_fp16_13, var_4597_cast_fp16))[name = tensor<string, []>("op_4631_cast_fp16")];
+            tensor<string, []> var_4633_equation_0 = const()[name = tensor<string, []>("op_4633_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4633_cast_fp16 = einsum(equation = var_4633_equation_0, values = (var_4523_cast_fp16_14, var_4598_cast_fp16))[name = tensor<string, []>("op_4633_cast_fp16")];
+            tensor<string, []> var_4635_equation_0 = const()[name = tensor<string, []>("op_4635_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4635_cast_fp16 = einsum(equation = var_4635_equation_0, values = (var_4523_cast_fp16_15, var_4599_cast_fp16))[name = tensor<string, []>("op_4635_cast_fp16")];
+            tensor<string, []> var_4637_equation_0 = const()[name = tensor<string, []>("op_4637_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4637_cast_fp16 = einsum(equation = var_4637_equation_0, values = (var_4523_cast_fp16_16, var_4600_cast_fp16))[name = tensor<string, []>("op_4637_cast_fp16")];
+            tensor<string, []> var_4639_equation_0 = const()[name = tensor<string, []>("op_4639_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4639_cast_fp16 = einsum(equation = var_4639_equation_0, values = (var_4523_cast_fp16_17, var_4601_cast_fp16))[name = tensor<string, []>("op_4639_cast_fp16")];
+            tensor<string, []> var_4641_equation_0 = const()[name = tensor<string, []>("op_4641_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4641_cast_fp16 = einsum(equation = var_4641_equation_0, values = (var_4523_cast_fp16_18, var_4602_cast_fp16))[name = tensor<string, []>("op_4641_cast_fp16")];
+            tensor<string, []> var_4643_equation_0 = const()[name = tensor<string, []>("op_4643_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4643_cast_fp16 = einsum(equation = var_4643_equation_0, values = (var_4523_cast_fp16_19, var_4603_cast_fp16))[name = tensor<string, []>("op_4643_cast_fp16")];
+            tensor<bool, []> input_165_interleave_0 = const()[name = tensor<string, []>("input_165_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_165_cast_fp16 = concat(axis = var_4428, interleave = input_165_interleave_0, values = (var_4605_cast_fp16, var_4607_cast_fp16, var_4609_cast_fp16, var_4611_cast_fp16, var_4613_cast_fp16, var_4615_cast_fp16, var_4617_cast_fp16, var_4619_cast_fp16, var_4621_cast_fp16, var_4623_cast_fp16, var_4625_cast_fp16, var_4627_cast_fp16, var_4629_cast_fp16, var_4631_cast_fp16, var_4633_cast_fp16, var_4635_cast_fp16, var_4637_cast_fp16, var_4639_cast_fp16, var_4641_cast_fp16, var_4643_cast_fp16))[name = tensor<string, []>("input_165_cast_fp16")];
+            tensor<string, []> var_4652_pad_type_0 = const()[name = tensor<string, []>("op_4652_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4652_strides_0 = const()[name = tensor<string, []>("op_4652_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4652_pad_0 = const()[name = tensor<string, []>("op_4652_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4652_dilations_0 = const()[name = tensor<string, []>("op_4652_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4652_groups_0 = const()[name = tensor<string, []>("op_4652_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_16_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(654152512)))];
+            tensor<fp16, [1280]> blocks_16_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(657429376)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4652_cast_fp16 = conv(bias = blocks_16_attn_out_bias_to_fp16, dilations = var_4652_dilations_0, groups = var_4652_groups_0, pad = var_4652_pad_0, pad_type = var_4652_pad_type_0, strides = var_4652_strides_0, weight = blocks_16_attn_out_weight_to_fp16, x = input_165_cast_fp16)[name = tensor<string, []>("op_4652_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = var_4652_cast_fp16)[name = tensor<string, []>("inputs_67_cast_fp16")];
+            tensor<int32, [1]> input_167_axes_0 = const()[name = tensor<string, []>("input_167_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_167_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_167_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(657432000)))];
+            tensor<fp16, [1280]> input_167_beta_0_to_fp16 = const()[name = tensor<string, []>("input_167_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(657434624)))];
+            tensor<fp16, []> var_4662_to_fp16 = const()[name = tensor<string, []>("op_4662_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_167_cast_fp16 = layer_norm(axes = input_167_axes_0, beta = input_167_beta_0_to_fp16, epsilon = var_4662_to_fp16, gamma = input_167_gamma_0_to_fp16, x = inputs_67_cast_fp16)[name = tensor<string, []>("input_167_cast_fp16")];
+            tensor<string, []> input_169_pad_type_0 = const()[name = tensor<string, []>("input_169_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_169_strides_0 = const()[name = tensor<string, []>("input_169_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_169_pad_0 = const()[name = tensor<string, []>("input_169_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_169_dilations_0 = const()[name = tensor<string, []>("input_169_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_169_groups_0 = const()[name = tensor<string, []>("input_169_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_16_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(657437248)))];
+            tensor<fp16, [5120]> blocks_16_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(670544512)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_169_cast_fp16 = conv(bias = blocks_16_mlp_0_bias_to_fp16, dilations = input_169_dilations_0, groups = input_169_groups_0, pad = input_169_pad_0, pad_type = input_169_pad_type_0, strides = input_169_strides_0, weight = blocks_16_mlp_0_weight_to_fp16, x = input_167_cast_fp16)[name = tensor<string, []>("input_169_cast_fp16")];
+            tensor<string, []> input_171_mode_0 = const()[name = tensor<string, []>("input_171_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_171_cast_fp16 = gelu(mode = input_171_mode_0, x = input_169_cast_fp16)[name = tensor<string, []>("input_171_cast_fp16")];
+            tensor<string, []> var_4688_pad_type_0 = const()[name = tensor<string, []>("op_4688_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4688_strides_0 = const()[name = tensor<string, []>("op_4688_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4688_pad_0 = const()[name = tensor<string, []>("op_4688_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4688_dilations_0 = const()[name = tensor<string, []>("op_4688_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4688_groups_0 = const()[name = tensor<string, []>("op_4688_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_16_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(670554816)))];
+            tensor<fp16, [1280]> blocks_16_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(683662080)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4688_cast_fp16 = conv(bias = blocks_16_mlp_2_bias_to_fp16, dilations = var_4688_dilations_0, groups = var_4688_groups_0, pad = var_4688_pad_0, pad_type = var_4688_pad_type_0, strides = var_4688_strides_0, weight = blocks_16_mlp_2_weight_to_fp16, x = input_171_cast_fp16)[name = tensor<string, []>("op_4688_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = var_4688_cast_fp16)[name = tensor<string, []>("inputs_69_cast_fp16")];
+            tensor<int32, []> var_4697 = const()[name = tensor<string, []>("op_4697"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_173_axes_0 = const()[name = tensor<string, []>("input_173_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_173_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_173_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(683664704)))];
+            tensor<fp16, [1280]> input_173_beta_0_to_fp16 = const()[name = tensor<string, []>("input_173_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(683667328)))];
+            tensor<fp16, []> var_4713_to_fp16 = const()[name = tensor<string, []>("op_4713_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_173_cast_fp16 = layer_norm(axes = input_173_axes_0, beta = input_173_beta_0_to_fp16, epsilon = var_4713_to_fp16, gamma = input_173_gamma_0_to_fp16, x = inputs_69_cast_fp16)[name = tensor<string, []>("input_173_cast_fp16")];
+            tensor<string, []> q_35_pad_type_0 = const()[name = tensor<string, []>("q_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_35_strides_0 = const()[name = tensor<string, []>("q_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_35_pad_0 = const()[name = tensor<string, []>("q_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_35_dilations_0 = const()[name = tensor<string, []>("q_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_35_groups_0 = const()[name = tensor<string, []>("q_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_4748_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4748_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(683669952)))];
+            tensor<fp16, [1280]> var_4748_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4748_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(686946816)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4748_cast_fp16 = conv(bias = var_4748_bias_0_to_fp16, dilations = q_35_dilations_0, groups = q_35_groups_0, pad = q_35_pad_0, pad_type = q_35_pad_type_0, strides = q_35_strides_0, weight = var_4748_weight_0_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("op_4748_cast_fp16")];
+            tensor<string, []> k_35_pad_type_0 = const()[name = tensor<string, []>("k_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_35_strides_0 = const()[name = tensor<string, []>("k_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_35_pad_0 = const()[name = tensor<string, []>("k_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_35_dilations_0 = const()[name = tensor<string, []>("k_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_35_groups_0 = const()[name = tensor<string, []>("k_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_17_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(686949440)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_35_cast_fp16 = conv(dilations = k_35_dilations_0, groups = k_35_groups_0, pad = k_35_pad_0, pad_type = k_35_pad_type_0, strides = k_35_strides_0, weight = blocks_17_attn_key_weight_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("k_35_cast_fp16")];
+            tensor<string, []> var_4746_pad_type_0 = const()[name = tensor<string, []>("op_4746_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4746_strides_0 = const()[name = tensor<string, []>("op_4746_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4746_pad_0 = const()[name = tensor<string, []>("op_4746_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4746_dilations_0 = const()[name = tensor<string, []>("op_4746_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4746_groups_0 = const()[name = tensor<string, []>("op_4746_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_17_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(690226304)))];
+            tensor<fp16, [1280]> blocks_17_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(693503168)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4746_cast_fp16 = conv(bias = blocks_17_attn_value_bias_to_fp16, dilations = var_4746_dilations_0, groups = var_4746_groups_0, pad = var_4746_pad_0, pad_type = var_4746_pad_type_0, strides = var_4746_strides_0, weight = blocks_17_attn_value_weight_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("op_4746_cast_fp16")];
+            tensor<int32, [20]> tile_51 = const()[name = tensor<string, []>("tile_51"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4749_axis_0 = const()[name = tensor<string, []>("op_4749_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4749_cast_fp16_19 = split(axis = var_4749_axis_0, split_sizes = tile_51, x = var_4748_cast_fp16)[name = tensor<string, []>("op_4749_cast_fp16")];
+            tensor<int32, [4]> var_4770_perm_0 = const()[name = tensor<string, []>("op_4770_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_52 = const()[name = tensor<string, []>("tile_52"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4771_axis_0 = const()[name = tensor<string, []>("op_4771_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_4770_cast_fp16 = transpose(perm = var_4770_perm_0, x = k_35_cast_fp16)[name = tensor<string, []>("transpose_15")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_4771_cast_fp16_19 = split(axis = var_4771_axis_0, split_sizes = tile_52, x = var_4770_cast_fp16)[name = tensor<string, []>("op_4771_cast_fp16")];
+            tensor<int32, [20]> tile_53 = const()[name = tensor<string, []>("tile_53"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4792_axis_0 = const()[name = tensor<string, []>("op_4792_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16_19 = split(axis = var_4792_axis_0, split_sizes = tile_53, x = var_4746_cast_fp16)[name = tensor<string, []>("op_4792_cast_fp16")];
+            tensor<string, []> aw_681_equation_0 = const()[name = tensor<string, []>("aw_681_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_681_cast_fp16 = einsum(equation = aw_681_equation_0, values = (var_4771_cast_fp16_0, var_4749_cast_fp16_0))[name = tensor<string, []>("aw_681_cast_fp16")];
+            tensor<string, []> aw_683_equation_0 = const()[name = tensor<string, []>("aw_683_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_683_cast_fp16 = einsum(equation = aw_683_equation_0, values = (var_4771_cast_fp16_1, var_4749_cast_fp16_1))[name = tensor<string, []>("aw_683_cast_fp16")];
+            tensor<string, []> aw_685_equation_0 = const()[name = tensor<string, []>("aw_685_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_685_cast_fp16 = einsum(equation = aw_685_equation_0, values = (var_4771_cast_fp16_2, var_4749_cast_fp16_2))[name = tensor<string, []>("aw_685_cast_fp16")];
+            tensor<string, []> aw_687_equation_0 = const()[name = tensor<string, []>("aw_687_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_687_cast_fp16 = einsum(equation = aw_687_equation_0, values = (var_4771_cast_fp16_3, var_4749_cast_fp16_3))[name = tensor<string, []>("aw_687_cast_fp16")];
+            tensor<string, []> aw_689_equation_0 = const()[name = tensor<string, []>("aw_689_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_689_cast_fp16 = einsum(equation = aw_689_equation_0, values = (var_4771_cast_fp16_4, var_4749_cast_fp16_4))[name = tensor<string, []>("aw_689_cast_fp16")];
+            tensor<string, []> aw_691_equation_0 = const()[name = tensor<string, []>("aw_691_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_691_cast_fp16 = einsum(equation = aw_691_equation_0, values = (var_4771_cast_fp16_5, var_4749_cast_fp16_5))[name = tensor<string, []>("aw_691_cast_fp16")];
+            tensor<string, []> aw_693_equation_0 = const()[name = tensor<string, []>("aw_693_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_693_cast_fp16 = einsum(equation = aw_693_equation_0, values = (var_4771_cast_fp16_6, var_4749_cast_fp16_6))[name = tensor<string, []>("aw_693_cast_fp16")];
+            tensor<string, []> aw_695_equation_0 = const()[name = tensor<string, []>("aw_695_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_695_cast_fp16 = einsum(equation = aw_695_equation_0, values = (var_4771_cast_fp16_7, var_4749_cast_fp16_7))[name = tensor<string, []>("aw_695_cast_fp16")];
+            tensor<string, []> aw_697_equation_0 = const()[name = tensor<string, []>("aw_697_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_697_cast_fp16 = einsum(equation = aw_697_equation_0, values = (var_4771_cast_fp16_8, var_4749_cast_fp16_8))[name = tensor<string, []>("aw_697_cast_fp16")];
+            tensor<string, []> aw_699_equation_0 = const()[name = tensor<string, []>("aw_699_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_699_cast_fp16 = einsum(equation = aw_699_equation_0, values = (var_4771_cast_fp16_9, var_4749_cast_fp16_9))[name = tensor<string, []>("aw_699_cast_fp16")];
+            tensor<string, []> aw_701_equation_0 = const()[name = tensor<string, []>("aw_701_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_701_cast_fp16 = einsum(equation = aw_701_equation_0, values = (var_4771_cast_fp16_10, var_4749_cast_fp16_10))[name = tensor<string, []>("aw_701_cast_fp16")];
+            tensor<string, []> aw_703_equation_0 = const()[name = tensor<string, []>("aw_703_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_703_cast_fp16 = einsum(equation = aw_703_equation_0, values = (var_4771_cast_fp16_11, var_4749_cast_fp16_11))[name = tensor<string, []>("aw_703_cast_fp16")];
+            tensor<string, []> aw_705_equation_0 = const()[name = tensor<string, []>("aw_705_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_705_cast_fp16 = einsum(equation = aw_705_equation_0, values = (var_4771_cast_fp16_12, var_4749_cast_fp16_12))[name = tensor<string, []>("aw_705_cast_fp16")];
+            tensor<string, []> aw_707_equation_0 = const()[name = tensor<string, []>("aw_707_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_707_cast_fp16 = einsum(equation = aw_707_equation_0, values = (var_4771_cast_fp16_13, var_4749_cast_fp16_13))[name = tensor<string, []>("aw_707_cast_fp16")];
+            tensor<string, []> aw_709_equation_0 = const()[name = tensor<string, []>("aw_709_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_709_cast_fp16 = einsum(equation = aw_709_equation_0, values = (var_4771_cast_fp16_14, var_4749_cast_fp16_14))[name = tensor<string, []>("aw_709_cast_fp16")];
+            tensor<string, []> aw_711_equation_0 = const()[name = tensor<string, []>("aw_711_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_711_cast_fp16 = einsum(equation = aw_711_equation_0, values = (var_4771_cast_fp16_15, var_4749_cast_fp16_15))[name = tensor<string, []>("aw_711_cast_fp16")];
+            tensor<string, []> aw_713_equation_0 = const()[name = tensor<string, []>("aw_713_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_713_cast_fp16 = einsum(equation = aw_713_equation_0, values = (var_4771_cast_fp16_16, var_4749_cast_fp16_16))[name = tensor<string, []>("aw_713_cast_fp16")];
+            tensor<string, []> aw_715_equation_0 = const()[name = tensor<string, []>("aw_715_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_715_cast_fp16 = einsum(equation = aw_715_equation_0, values = (var_4771_cast_fp16_17, var_4749_cast_fp16_17))[name = tensor<string, []>("aw_715_cast_fp16")];
+            tensor<string, []> aw_717_equation_0 = const()[name = tensor<string, []>("aw_717_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_717_cast_fp16 = einsum(equation = aw_717_equation_0, values = (var_4771_cast_fp16_18, var_4749_cast_fp16_18))[name = tensor<string, []>("aw_717_cast_fp16")];
+            tensor<string, []> aw_719_equation_0 = const()[name = tensor<string, []>("aw_719_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_719_cast_fp16 = einsum(equation = aw_719_equation_0, values = (var_4771_cast_fp16_19, var_4749_cast_fp16_19))[name = tensor<string, []>("aw_719_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4853_cast_fp16 = softmax(axis = var_4697, x = aw_681_cast_fp16)[name = tensor<string, []>("op_4853_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4854_cast_fp16 = softmax(axis = var_4697, x = aw_683_cast_fp16)[name = tensor<string, []>("op_4854_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4855_cast_fp16 = softmax(axis = var_4697, x = aw_685_cast_fp16)[name = tensor<string, []>("op_4855_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4856_cast_fp16 = softmax(axis = var_4697, x = aw_687_cast_fp16)[name = tensor<string, []>("op_4856_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4857_cast_fp16 = softmax(axis = var_4697, x = aw_689_cast_fp16)[name = tensor<string, []>("op_4857_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4858_cast_fp16 = softmax(axis = var_4697, x = aw_691_cast_fp16)[name = tensor<string, []>("op_4858_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4859_cast_fp16 = softmax(axis = var_4697, x = aw_693_cast_fp16)[name = tensor<string, []>("op_4859_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4860_cast_fp16 = softmax(axis = var_4697, x = aw_695_cast_fp16)[name = tensor<string, []>("op_4860_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4861_cast_fp16 = softmax(axis = var_4697, x = aw_697_cast_fp16)[name = tensor<string, []>("op_4861_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4862_cast_fp16 = softmax(axis = var_4697, x = aw_699_cast_fp16)[name = tensor<string, []>("op_4862_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4863_cast_fp16 = softmax(axis = var_4697, x = aw_701_cast_fp16)[name = tensor<string, []>("op_4863_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4864_cast_fp16 = softmax(axis = var_4697, x = aw_703_cast_fp16)[name = tensor<string, []>("op_4864_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4865_cast_fp16 = softmax(axis = var_4697, x = aw_705_cast_fp16)[name = tensor<string, []>("op_4865_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4866_cast_fp16 = softmax(axis = var_4697, x = aw_707_cast_fp16)[name = tensor<string, []>("op_4866_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4867_cast_fp16 = softmax(axis = var_4697, x = aw_709_cast_fp16)[name = tensor<string, []>("op_4867_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4868_cast_fp16 = softmax(axis = var_4697, x = aw_711_cast_fp16)[name = tensor<string, []>("op_4868_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4869_cast_fp16 = softmax(axis = var_4697, x = aw_713_cast_fp16)[name = tensor<string, []>("op_4869_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4870_cast_fp16 = softmax(axis = var_4697, x = aw_715_cast_fp16)[name = tensor<string, []>("op_4870_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4871_cast_fp16 = softmax(axis = var_4697, x = aw_717_cast_fp16)[name = tensor<string, []>("op_4871_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4872_cast_fp16 = softmax(axis = var_4697, x = aw_719_cast_fp16)[name = tensor<string, []>("op_4872_cast_fp16")];
+            tensor<string, []> var_4874_equation_0 = const()[name = tensor<string, []>("op_4874_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4874_cast_fp16 = einsum(equation = var_4874_equation_0, values = (var_4792_cast_fp16_0, var_4853_cast_fp16))[name = tensor<string, []>("op_4874_cast_fp16")];
+            tensor<string, []> var_4876_equation_0 = const()[name = tensor<string, []>("op_4876_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4876_cast_fp16 = einsum(equation = var_4876_equation_0, values = (var_4792_cast_fp16_1, var_4854_cast_fp16))[name = tensor<string, []>("op_4876_cast_fp16")];
+            tensor<string, []> var_4878_equation_0 = const()[name = tensor<string, []>("op_4878_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4878_cast_fp16 = einsum(equation = var_4878_equation_0, values = (var_4792_cast_fp16_2, var_4855_cast_fp16))[name = tensor<string, []>("op_4878_cast_fp16")];
+            tensor<string, []> var_4880_equation_0 = const()[name = tensor<string, []>("op_4880_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4880_cast_fp16 = einsum(equation = var_4880_equation_0, values = (var_4792_cast_fp16_3, var_4856_cast_fp16))[name = tensor<string, []>("op_4880_cast_fp16")];
+            tensor<string, []> var_4882_equation_0 = const()[name = tensor<string, []>("op_4882_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4882_cast_fp16 = einsum(equation = var_4882_equation_0, values = (var_4792_cast_fp16_4, var_4857_cast_fp16))[name = tensor<string, []>("op_4882_cast_fp16")];
+            tensor<string, []> var_4884_equation_0 = const()[name = tensor<string, []>("op_4884_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4884_cast_fp16 = einsum(equation = var_4884_equation_0, values = (var_4792_cast_fp16_5, var_4858_cast_fp16))[name = tensor<string, []>("op_4884_cast_fp16")];
+            tensor<string, []> var_4886_equation_0 = const()[name = tensor<string, []>("op_4886_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4886_cast_fp16 = einsum(equation = var_4886_equation_0, values = (var_4792_cast_fp16_6, var_4859_cast_fp16))[name = tensor<string, []>("op_4886_cast_fp16")];
+            tensor<string, []> var_4888_equation_0 = const()[name = tensor<string, []>("op_4888_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4888_cast_fp16 = einsum(equation = var_4888_equation_0, values = (var_4792_cast_fp16_7, var_4860_cast_fp16))[name = tensor<string, []>("op_4888_cast_fp16")];
+            tensor<string, []> var_4890_equation_0 = const()[name = tensor<string, []>("op_4890_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4890_cast_fp16 = einsum(equation = var_4890_equation_0, values = (var_4792_cast_fp16_8, var_4861_cast_fp16))[name = tensor<string, []>("op_4890_cast_fp16")];
+            tensor<string, []> var_4892_equation_0 = const()[name = tensor<string, []>("op_4892_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4892_cast_fp16 = einsum(equation = var_4892_equation_0, values = (var_4792_cast_fp16_9, var_4862_cast_fp16))[name = tensor<string, []>("op_4892_cast_fp16")];
+            tensor<string, []> var_4894_equation_0 = const()[name = tensor<string, []>("op_4894_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4894_cast_fp16 = einsum(equation = var_4894_equation_0, values = (var_4792_cast_fp16_10, var_4863_cast_fp16))[name = tensor<string, []>("op_4894_cast_fp16")];
+            tensor<string, []> var_4896_equation_0 = const()[name = tensor<string, []>("op_4896_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4896_cast_fp16 = einsum(equation = var_4896_equation_0, values = (var_4792_cast_fp16_11, var_4864_cast_fp16))[name = tensor<string, []>("op_4896_cast_fp16")];
+            tensor<string, []> var_4898_equation_0 = const()[name = tensor<string, []>("op_4898_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4898_cast_fp16 = einsum(equation = var_4898_equation_0, values = (var_4792_cast_fp16_12, var_4865_cast_fp16))[name = tensor<string, []>("op_4898_cast_fp16")];
+            tensor<string, []> var_4900_equation_0 = const()[name = tensor<string, []>("op_4900_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16 = einsum(equation = var_4900_equation_0, values = (var_4792_cast_fp16_13, var_4866_cast_fp16))[name = tensor<string, []>("op_4900_cast_fp16")];
+            tensor<string, []> var_4902_equation_0 = const()[name = tensor<string, []>("op_4902_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4902_cast_fp16 = einsum(equation = var_4902_equation_0, values = (var_4792_cast_fp16_14, var_4867_cast_fp16))[name = tensor<string, []>("op_4902_cast_fp16")];
+            tensor<string, []> var_4904_equation_0 = const()[name = tensor<string, []>("op_4904_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4904_cast_fp16 = einsum(equation = var_4904_equation_0, values = (var_4792_cast_fp16_15, var_4868_cast_fp16))[name = tensor<string, []>("op_4904_cast_fp16")];
+            tensor<string, []> var_4906_equation_0 = const()[name = tensor<string, []>("op_4906_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4906_cast_fp16 = einsum(equation = var_4906_equation_0, values = (var_4792_cast_fp16_16, var_4869_cast_fp16))[name = tensor<string, []>("op_4906_cast_fp16")];
+            tensor<string, []> var_4908_equation_0 = const()[name = tensor<string, []>("op_4908_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4908_cast_fp16 = einsum(equation = var_4908_equation_0, values = (var_4792_cast_fp16_17, var_4870_cast_fp16))[name = tensor<string, []>("op_4908_cast_fp16")];
+            tensor<string, []> var_4910_equation_0 = const()[name = tensor<string, []>("op_4910_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4910_cast_fp16 = einsum(equation = var_4910_equation_0, values = (var_4792_cast_fp16_18, var_4871_cast_fp16))[name = tensor<string, []>("op_4910_cast_fp16")];
+            tensor<string, []> var_4912_equation_0 = const()[name = tensor<string, []>("op_4912_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4912_cast_fp16 = einsum(equation = var_4912_equation_0, values = (var_4792_cast_fp16_19, var_4872_cast_fp16))[name = tensor<string, []>("op_4912_cast_fp16")];
+            tensor<bool, []> input_175_interleave_0 = const()[name = tensor<string, []>("input_175_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_175_cast_fp16 = concat(axis = var_4697, interleave = input_175_interleave_0, values = (var_4874_cast_fp16, var_4876_cast_fp16, var_4878_cast_fp16, var_4880_cast_fp16, var_4882_cast_fp16, var_4884_cast_fp16, var_4886_cast_fp16, var_4888_cast_fp16, var_4890_cast_fp16, var_4892_cast_fp16, var_4894_cast_fp16, var_4896_cast_fp16, var_4898_cast_fp16, var_4900_cast_fp16, var_4902_cast_fp16, var_4904_cast_fp16, var_4906_cast_fp16, var_4908_cast_fp16, var_4910_cast_fp16, var_4912_cast_fp16))[name = tensor<string, []>("input_175_cast_fp16")];
+            tensor<string, []> var_4921_pad_type_0 = const()[name = tensor<string, []>("op_4921_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4921_strides_0 = const()[name = tensor<string, []>("op_4921_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4921_pad_0 = const()[name = tensor<string, []>("op_4921_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4921_dilations_0 = const()[name = tensor<string, []>("op_4921_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4921_groups_0 = const()[name = tensor<string, []>("op_4921_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_17_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(693505792)))];
+            tensor<fp16, [1280]> blocks_17_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(696782656)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4921_cast_fp16 = conv(bias = blocks_17_attn_out_bias_to_fp16, dilations = var_4921_dilations_0, groups = var_4921_groups_0, pad = var_4921_pad_0, pad_type = var_4921_pad_type_0, strides = var_4921_strides_0, weight = blocks_17_attn_out_weight_to_fp16, x = input_175_cast_fp16)[name = tensor<string, []>("op_4921_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = var_4921_cast_fp16)[name = tensor<string, []>("inputs_71_cast_fp16")];
+            tensor<int32, [1]> input_177_axes_0 = const()[name = tensor<string, []>("input_177_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_177_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_177_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(696785280)))];
+            tensor<fp16, [1280]> input_177_beta_0_to_fp16 = const()[name = tensor<string, []>("input_177_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(696787904)))];
+            tensor<fp16, []> var_4931_to_fp16 = const()[name = tensor<string, []>("op_4931_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_177_cast_fp16 = layer_norm(axes = input_177_axes_0, beta = input_177_beta_0_to_fp16, epsilon = var_4931_to_fp16, gamma = input_177_gamma_0_to_fp16, x = inputs_71_cast_fp16)[name = tensor<string, []>("input_177_cast_fp16")];
+            tensor<string, []> input_179_pad_type_0 = const()[name = tensor<string, []>("input_179_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_179_strides_0 = const()[name = tensor<string, []>("input_179_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_179_pad_0 = const()[name = tensor<string, []>("input_179_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_179_dilations_0 = const()[name = tensor<string, []>("input_179_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_179_groups_0 = const()[name = tensor<string, []>("input_179_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_17_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(696790528)))];
+            tensor<fp16, [5120]> blocks_17_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(709897792)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_179_cast_fp16 = conv(bias = blocks_17_mlp_0_bias_to_fp16, dilations = input_179_dilations_0, groups = input_179_groups_0, pad = input_179_pad_0, pad_type = input_179_pad_type_0, strides = input_179_strides_0, weight = blocks_17_mlp_0_weight_to_fp16, x = input_177_cast_fp16)[name = tensor<string, []>("input_179_cast_fp16")];
+            tensor<string, []> input_181_mode_0 = const()[name = tensor<string, []>("input_181_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_181_cast_fp16 = gelu(mode = input_181_mode_0, x = input_179_cast_fp16)[name = tensor<string, []>("input_181_cast_fp16")];
+            tensor<string, []> var_4957_pad_type_0 = const()[name = tensor<string, []>("op_4957_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4957_strides_0 = const()[name = tensor<string, []>("op_4957_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4957_pad_0 = const()[name = tensor<string, []>("op_4957_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4957_dilations_0 = const()[name = tensor<string, []>("op_4957_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4957_groups_0 = const()[name = tensor<string, []>("op_4957_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_17_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(709908096)))];
+            tensor<fp16, [1280]> blocks_17_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(723015360)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4957_cast_fp16 = conv(bias = blocks_17_mlp_2_bias_to_fp16, dilations = var_4957_dilations_0, groups = var_4957_groups_0, pad = var_4957_pad_0, pad_type = var_4957_pad_type_0, strides = var_4957_strides_0, weight = blocks_17_mlp_2_weight_to_fp16, x = input_181_cast_fp16)[name = tensor<string, []>("op_4957_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_73_cast_fp16 = add(x = inputs_71_cast_fp16, y = var_4957_cast_fp16)[name = tensor<string, []>("inputs_73_cast_fp16")];
+            tensor<int32, []> var_4966 = const()[name = tensor<string, []>("op_4966"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_183_axes_0 = const()[name = tensor<string, []>("input_183_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_183_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_183_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(723017984)))];
+            tensor<fp16, [1280]> input_183_beta_0_to_fp16 = const()[name = tensor<string, []>("input_183_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(723020608)))];
+            tensor<fp16, []> var_4982_to_fp16 = const()[name = tensor<string, []>("op_4982_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_183_cast_fp16 = layer_norm(axes = input_183_axes_0, beta = input_183_beta_0_to_fp16, epsilon = var_4982_to_fp16, gamma = input_183_gamma_0_to_fp16, x = inputs_73_cast_fp16)[name = tensor<string, []>("input_183_cast_fp16")];
+            tensor<string, []> q_37_pad_type_0 = const()[name = tensor<string, []>("q_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_37_strides_0 = const()[name = tensor<string, []>("q_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_37_pad_0 = const()[name = tensor<string, []>("q_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_37_dilations_0 = const()[name = tensor<string, []>("q_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_37_groups_0 = const()[name = tensor<string, []>("q_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_5017_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5017_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(723023232)))];
+            tensor<fp16, [1280]> var_5017_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5017_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(726300096)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5017_cast_fp16 = conv(bias = var_5017_bias_0_to_fp16, dilations = q_37_dilations_0, groups = q_37_groups_0, pad = q_37_pad_0, pad_type = q_37_pad_type_0, strides = q_37_strides_0, weight = var_5017_weight_0_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("op_5017_cast_fp16")];
+            tensor<string, []> k_37_pad_type_0 = const()[name = tensor<string, []>("k_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_37_strides_0 = const()[name = tensor<string, []>("k_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_37_pad_0 = const()[name = tensor<string, []>("k_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_37_dilations_0 = const()[name = tensor<string, []>("k_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_37_groups_0 = const()[name = tensor<string, []>("k_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_18_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(726302720)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_37_cast_fp16 = conv(dilations = k_37_dilations_0, groups = k_37_groups_0, pad = k_37_pad_0, pad_type = k_37_pad_type_0, strides = k_37_strides_0, weight = blocks_18_attn_key_weight_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("k_37_cast_fp16")];
+            tensor<string, []> var_5015_pad_type_0 = const()[name = tensor<string, []>("op_5015_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5015_strides_0 = const()[name = tensor<string, []>("op_5015_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5015_pad_0 = const()[name = tensor<string, []>("op_5015_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5015_dilations_0 = const()[name = tensor<string, []>("op_5015_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5015_groups_0 = const()[name = tensor<string, []>("op_5015_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_18_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(729579584)))];
+            tensor<fp16, [1280]> blocks_18_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(732856448)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5015_cast_fp16 = conv(bias = blocks_18_attn_value_bias_to_fp16, dilations = var_5015_dilations_0, groups = var_5015_groups_0, pad = var_5015_pad_0, pad_type = var_5015_pad_type_0, strides = var_5015_strides_0, weight = blocks_18_attn_value_weight_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("op_5015_cast_fp16")];
+            tensor<int32, [20]> tile_54 = const()[name = tensor<string, []>("tile_54"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5018_axis_0 = const()[name = tensor<string, []>("op_5018_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5018_cast_fp16_19 = split(axis = var_5018_axis_0, split_sizes = tile_54, x = var_5017_cast_fp16)[name = tensor<string, []>("op_5018_cast_fp16")];
+            tensor<int32, [4]> var_5039_perm_0 = const()[name = tensor<string, []>("op_5039_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_55 = const()[name = tensor<string, []>("tile_55"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5040_axis_0 = const()[name = tensor<string, []>("op_5040_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_5039_cast_fp16 = transpose(perm = var_5039_perm_0, x = k_37_cast_fp16)[name = tensor<string, []>("transpose_14")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_5040_cast_fp16_19 = split(axis = var_5040_axis_0, split_sizes = tile_55, x = var_5039_cast_fp16)[name = tensor<string, []>("op_5040_cast_fp16")];
+            tensor<int32, [20]> tile_56 = const()[name = tensor<string, []>("tile_56"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5061_axis_0 = const()[name = tensor<string, []>("op_5061_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5061_cast_fp16_19 = split(axis = var_5061_axis_0, split_sizes = tile_56, x = var_5015_cast_fp16)[name = tensor<string, []>("op_5061_cast_fp16")];
+            tensor<string, []> aw_721_equation_0 = const()[name = tensor<string, []>("aw_721_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_721_cast_fp16 = einsum(equation = aw_721_equation_0, values = (var_5040_cast_fp16_0, var_5018_cast_fp16_0))[name = tensor<string, []>("aw_721_cast_fp16")];
+            tensor<string, []> aw_723_equation_0 = const()[name = tensor<string, []>("aw_723_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_723_cast_fp16 = einsum(equation = aw_723_equation_0, values = (var_5040_cast_fp16_1, var_5018_cast_fp16_1))[name = tensor<string, []>("aw_723_cast_fp16")];
+            tensor<string, []> aw_725_equation_0 = const()[name = tensor<string, []>("aw_725_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_725_cast_fp16 = einsum(equation = aw_725_equation_0, values = (var_5040_cast_fp16_2, var_5018_cast_fp16_2))[name = tensor<string, []>("aw_725_cast_fp16")];
+            tensor<string, []> aw_727_equation_0 = const()[name = tensor<string, []>("aw_727_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_727_cast_fp16 = einsum(equation = aw_727_equation_0, values = (var_5040_cast_fp16_3, var_5018_cast_fp16_3))[name = tensor<string, []>("aw_727_cast_fp16")];
+            tensor<string, []> aw_729_equation_0 = const()[name = tensor<string, []>("aw_729_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_729_cast_fp16 = einsum(equation = aw_729_equation_0, values = (var_5040_cast_fp16_4, var_5018_cast_fp16_4))[name = tensor<string, []>("aw_729_cast_fp16")];
+            tensor<string, []> aw_731_equation_0 = const()[name = tensor<string, []>("aw_731_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_731_cast_fp16 = einsum(equation = aw_731_equation_0, values = (var_5040_cast_fp16_5, var_5018_cast_fp16_5))[name = tensor<string, []>("aw_731_cast_fp16")];
+            tensor<string, []> aw_733_equation_0 = const()[name = tensor<string, []>("aw_733_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_733_cast_fp16 = einsum(equation = aw_733_equation_0, values = (var_5040_cast_fp16_6, var_5018_cast_fp16_6))[name = tensor<string, []>("aw_733_cast_fp16")];
+            tensor<string, []> aw_735_equation_0 = const()[name = tensor<string, []>("aw_735_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_735_cast_fp16 = einsum(equation = aw_735_equation_0, values = (var_5040_cast_fp16_7, var_5018_cast_fp16_7))[name = tensor<string, []>("aw_735_cast_fp16")];
+            tensor<string, []> aw_737_equation_0 = const()[name = tensor<string, []>("aw_737_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_737_cast_fp16 = einsum(equation = aw_737_equation_0, values = (var_5040_cast_fp16_8, var_5018_cast_fp16_8))[name = tensor<string, []>("aw_737_cast_fp16")];
+            tensor<string, []> aw_739_equation_0 = const()[name = tensor<string, []>("aw_739_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_739_cast_fp16 = einsum(equation = aw_739_equation_0, values = (var_5040_cast_fp16_9, var_5018_cast_fp16_9))[name = tensor<string, []>("aw_739_cast_fp16")];
+            tensor<string, []> aw_741_equation_0 = const()[name = tensor<string, []>("aw_741_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_741_cast_fp16 = einsum(equation = aw_741_equation_0, values = (var_5040_cast_fp16_10, var_5018_cast_fp16_10))[name = tensor<string, []>("aw_741_cast_fp16")];
+            tensor<string, []> aw_743_equation_0 = const()[name = tensor<string, []>("aw_743_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_743_cast_fp16 = einsum(equation = aw_743_equation_0, values = (var_5040_cast_fp16_11, var_5018_cast_fp16_11))[name = tensor<string, []>("aw_743_cast_fp16")];
+            tensor<string, []> aw_745_equation_0 = const()[name = tensor<string, []>("aw_745_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_745_cast_fp16 = einsum(equation = aw_745_equation_0, values = (var_5040_cast_fp16_12, var_5018_cast_fp16_12))[name = tensor<string, []>("aw_745_cast_fp16")];
+            tensor<string, []> aw_747_equation_0 = const()[name = tensor<string, []>("aw_747_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_747_cast_fp16 = einsum(equation = aw_747_equation_0, values = (var_5040_cast_fp16_13, var_5018_cast_fp16_13))[name = tensor<string, []>("aw_747_cast_fp16")];
+            tensor<string, []> aw_749_equation_0 = const()[name = tensor<string, []>("aw_749_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_749_cast_fp16 = einsum(equation = aw_749_equation_0, values = (var_5040_cast_fp16_14, var_5018_cast_fp16_14))[name = tensor<string, []>("aw_749_cast_fp16")];
+            tensor<string, []> aw_751_equation_0 = const()[name = tensor<string, []>("aw_751_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_751_cast_fp16 = einsum(equation = aw_751_equation_0, values = (var_5040_cast_fp16_15, var_5018_cast_fp16_15))[name = tensor<string, []>("aw_751_cast_fp16")];
+            tensor<string, []> aw_753_equation_0 = const()[name = tensor<string, []>("aw_753_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_753_cast_fp16 = einsum(equation = aw_753_equation_0, values = (var_5040_cast_fp16_16, var_5018_cast_fp16_16))[name = tensor<string, []>("aw_753_cast_fp16")];
+            tensor<string, []> aw_755_equation_0 = const()[name = tensor<string, []>("aw_755_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_755_cast_fp16 = einsum(equation = aw_755_equation_0, values = (var_5040_cast_fp16_17, var_5018_cast_fp16_17))[name = tensor<string, []>("aw_755_cast_fp16")];
+            tensor<string, []> aw_757_equation_0 = const()[name = tensor<string, []>("aw_757_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_757_cast_fp16 = einsum(equation = aw_757_equation_0, values = (var_5040_cast_fp16_18, var_5018_cast_fp16_18))[name = tensor<string, []>("aw_757_cast_fp16")];
+            tensor<string, []> aw_759_equation_0 = const()[name = tensor<string, []>("aw_759_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_759_cast_fp16 = einsum(equation = aw_759_equation_0, values = (var_5040_cast_fp16_19, var_5018_cast_fp16_19))[name = tensor<string, []>("aw_759_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5122_cast_fp16 = softmax(axis = var_4966, x = aw_721_cast_fp16)[name = tensor<string, []>("op_5122_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5123_cast_fp16 = softmax(axis = var_4966, x = aw_723_cast_fp16)[name = tensor<string, []>("op_5123_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5124_cast_fp16 = softmax(axis = var_4966, x = aw_725_cast_fp16)[name = tensor<string, []>("op_5124_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5125_cast_fp16 = softmax(axis = var_4966, x = aw_727_cast_fp16)[name = tensor<string, []>("op_5125_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5126_cast_fp16 = softmax(axis = var_4966, x = aw_729_cast_fp16)[name = tensor<string, []>("op_5126_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5127_cast_fp16 = softmax(axis = var_4966, x = aw_731_cast_fp16)[name = tensor<string, []>("op_5127_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5128_cast_fp16 = softmax(axis = var_4966, x = aw_733_cast_fp16)[name = tensor<string, []>("op_5128_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5129_cast_fp16 = softmax(axis = var_4966, x = aw_735_cast_fp16)[name = tensor<string, []>("op_5129_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5130_cast_fp16 = softmax(axis = var_4966, x = aw_737_cast_fp16)[name = tensor<string, []>("op_5130_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5131_cast_fp16 = softmax(axis = var_4966, x = aw_739_cast_fp16)[name = tensor<string, []>("op_5131_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5132_cast_fp16 = softmax(axis = var_4966, x = aw_741_cast_fp16)[name = tensor<string, []>("op_5132_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5133_cast_fp16 = softmax(axis = var_4966, x = aw_743_cast_fp16)[name = tensor<string, []>("op_5133_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5134_cast_fp16 = softmax(axis = var_4966, x = aw_745_cast_fp16)[name = tensor<string, []>("op_5134_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5135_cast_fp16 = softmax(axis = var_4966, x = aw_747_cast_fp16)[name = tensor<string, []>("op_5135_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5136_cast_fp16 = softmax(axis = var_4966, x = aw_749_cast_fp16)[name = tensor<string, []>("op_5136_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5137_cast_fp16 = softmax(axis = var_4966, x = aw_751_cast_fp16)[name = tensor<string, []>("op_5137_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5138_cast_fp16 = softmax(axis = var_4966, x = aw_753_cast_fp16)[name = tensor<string, []>("op_5138_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5139_cast_fp16 = softmax(axis = var_4966, x = aw_755_cast_fp16)[name = tensor<string, []>("op_5139_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5140_cast_fp16 = softmax(axis = var_4966, x = aw_757_cast_fp16)[name = tensor<string, []>("op_5140_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5141_cast_fp16 = softmax(axis = var_4966, x = aw_759_cast_fp16)[name = tensor<string, []>("op_5141_cast_fp16")];
+            tensor<string, []> var_5143_equation_0 = const()[name = tensor<string, []>("op_5143_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5143_cast_fp16 = einsum(equation = var_5143_equation_0, values = (var_5061_cast_fp16_0, var_5122_cast_fp16))[name = tensor<string, []>("op_5143_cast_fp16")];
+            tensor<string, []> var_5145_equation_0 = const()[name = tensor<string, []>("op_5145_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5145_cast_fp16 = einsum(equation = var_5145_equation_0, values = (var_5061_cast_fp16_1, var_5123_cast_fp16))[name = tensor<string, []>("op_5145_cast_fp16")];
+            tensor<string, []> var_5147_equation_0 = const()[name = tensor<string, []>("op_5147_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5147_cast_fp16 = einsum(equation = var_5147_equation_0, values = (var_5061_cast_fp16_2, var_5124_cast_fp16))[name = tensor<string, []>("op_5147_cast_fp16")];
+            tensor<string, []> var_5149_equation_0 = const()[name = tensor<string, []>("op_5149_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5149_cast_fp16 = einsum(equation = var_5149_equation_0, values = (var_5061_cast_fp16_3, var_5125_cast_fp16))[name = tensor<string, []>("op_5149_cast_fp16")];
+            tensor<string, []> var_5151_equation_0 = const()[name = tensor<string, []>("op_5151_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5151_cast_fp16 = einsum(equation = var_5151_equation_0, values = (var_5061_cast_fp16_4, var_5126_cast_fp16))[name = tensor<string, []>("op_5151_cast_fp16")];
+            tensor<string, []> var_5153_equation_0 = const()[name = tensor<string, []>("op_5153_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5153_cast_fp16 = einsum(equation = var_5153_equation_0, values = (var_5061_cast_fp16_5, var_5127_cast_fp16))[name = tensor<string, []>("op_5153_cast_fp16")];
+            tensor<string, []> var_5155_equation_0 = const()[name = tensor<string, []>("op_5155_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5155_cast_fp16 = einsum(equation = var_5155_equation_0, values = (var_5061_cast_fp16_6, var_5128_cast_fp16))[name = tensor<string, []>("op_5155_cast_fp16")];
+            tensor<string, []> var_5157_equation_0 = const()[name = tensor<string, []>("op_5157_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5157_cast_fp16 = einsum(equation = var_5157_equation_0, values = (var_5061_cast_fp16_7, var_5129_cast_fp16))[name = tensor<string, []>("op_5157_cast_fp16")];
+            tensor<string, []> var_5159_equation_0 = const()[name = tensor<string, []>("op_5159_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5159_cast_fp16 = einsum(equation = var_5159_equation_0, values = (var_5061_cast_fp16_8, var_5130_cast_fp16))[name = tensor<string, []>("op_5159_cast_fp16")];
+            tensor<string, []> var_5161_equation_0 = const()[name = tensor<string, []>("op_5161_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5161_cast_fp16 = einsum(equation = var_5161_equation_0, values = (var_5061_cast_fp16_9, var_5131_cast_fp16))[name = tensor<string, []>("op_5161_cast_fp16")];
+            tensor<string, []> var_5163_equation_0 = const()[name = tensor<string, []>("op_5163_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5163_cast_fp16 = einsum(equation = var_5163_equation_0, values = (var_5061_cast_fp16_10, var_5132_cast_fp16))[name = tensor<string, []>("op_5163_cast_fp16")];
+            tensor<string, []> var_5165_equation_0 = const()[name = tensor<string, []>("op_5165_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5165_cast_fp16 = einsum(equation = var_5165_equation_0, values = (var_5061_cast_fp16_11, var_5133_cast_fp16))[name = tensor<string, []>("op_5165_cast_fp16")];
+            tensor<string, []> var_5167_equation_0 = const()[name = tensor<string, []>("op_5167_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5167_cast_fp16 = einsum(equation = var_5167_equation_0, values = (var_5061_cast_fp16_12, var_5134_cast_fp16))[name = tensor<string, []>("op_5167_cast_fp16")];
+            tensor<string, []> var_5169_equation_0 = const()[name = tensor<string, []>("op_5169_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5169_cast_fp16 = einsum(equation = var_5169_equation_0, values = (var_5061_cast_fp16_13, var_5135_cast_fp16))[name = tensor<string, []>("op_5169_cast_fp16")];
+            tensor<string, []> var_5171_equation_0 = const()[name = tensor<string, []>("op_5171_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5171_cast_fp16 = einsum(equation = var_5171_equation_0, values = (var_5061_cast_fp16_14, var_5136_cast_fp16))[name = tensor<string, []>("op_5171_cast_fp16")];
+            tensor<string, []> var_5173_equation_0 = const()[name = tensor<string, []>("op_5173_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5173_cast_fp16 = einsum(equation = var_5173_equation_0, values = (var_5061_cast_fp16_15, var_5137_cast_fp16))[name = tensor<string, []>("op_5173_cast_fp16")];
+            tensor<string, []> var_5175_equation_0 = const()[name = tensor<string, []>("op_5175_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5175_cast_fp16 = einsum(equation = var_5175_equation_0, values = (var_5061_cast_fp16_16, var_5138_cast_fp16))[name = tensor<string, []>("op_5175_cast_fp16")];
+            tensor<string, []> var_5177_equation_0 = const()[name = tensor<string, []>("op_5177_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5177_cast_fp16 = einsum(equation = var_5177_equation_0, values = (var_5061_cast_fp16_17, var_5139_cast_fp16))[name = tensor<string, []>("op_5177_cast_fp16")];
+            tensor<string, []> var_5179_equation_0 = const()[name = tensor<string, []>("op_5179_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5179_cast_fp16 = einsum(equation = var_5179_equation_0, values = (var_5061_cast_fp16_18, var_5140_cast_fp16))[name = tensor<string, []>("op_5179_cast_fp16")];
+            tensor<string, []> var_5181_equation_0 = const()[name = tensor<string, []>("op_5181_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5181_cast_fp16 = einsum(equation = var_5181_equation_0, values = (var_5061_cast_fp16_19, var_5141_cast_fp16))[name = tensor<string, []>("op_5181_cast_fp16")];
+            tensor<bool, []> input_185_interleave_0 = const()[name = tensor<string, []>("input_185_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_185_cast_fp16 = concat(axis = var_4966, interleave = input_185_interleave_0, values = (var_5143_cast_fp16, var_5145_cast_fp16, var_5147_cast_fp16, var_5149_cast_fp16, var_5151_cast_fp16, var_5153_cast_fp16, var_5155_cast_fp16, var_5157_cast_fp16, var_5159_cast_fp16, var_5161_cast_fp16, var_5163_cast_fp16, var_5165_cast_fp16, var_5167_cast_fp16, var_5169_cast_fp16, var_5171_cast_fp16, var_5173_cast_fp16, var_5175_cast_fp16, var_5177_cast_fp16, var_5179_cast_fp16, var_5181_cast_fp16))[name = tensor<string, []>("input_185_cast_fp16")];
+            tensor<string, []> var_5190_pad_type_0 = const()[name = tensor<string, []>("op_5190_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5190_strides_0 = const()[name = tensor<string, []>("op_5190_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5190_pad_0 = const()[name = tensor<string, []>("op_5190_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5190_dilations_0 = const()[name = tensor<string, []>("op_5190_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5190_groups_0 = const()[name = tensor<string, []>("op_5190_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_18_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(732859072)))];
+            tensor<fp16, [1280]> blocks_18_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(736135936)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5190_cast_fp16 = conv(bias = blocks_18_attn_out_bias_to_fp16, dilations = var_5190_dilations_0, groups = var_5190_groups_0, pad = var_5190_pad_0, pad_type = var_5190_pad_type_0, strides = var_5190_strides_0, weight = blocks_18_attn_out_weight_to_fp16, x = input_185_cast_fp16)[name = tensor<string, []>("op_5190_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_75_cast_fp16 = add(x = inputs_73_cast_fp16, y = var_5190_cast_fp16)[name = tensor<string, []>("inputs_75_cast_fp16")];
+            tensor<int32, [1]> input_187_axes_0 = const()[name = tensor<string, []>("input_187_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_187_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_187_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(736138560)))];
+            tensor<fp16, [1280]> input_187_beta_0_to_fp16 = const()[name = tensor<string, []>("input_187_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(736141184)))];
+            tensor<fp16, []> var_5200_to_fp16 = const()[name = tensor<string, []>("op_5200_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_187_cast_fp16 = layer_norm(axes = input_187_axes_0, beta = input_187_beta_0_to_fp16, epsilon = var_5200_to_fp16, gamma = input_187_gamma_0_to_fp16, x = inputs_75_cast_fp16)[name = tensor<string, []>("input_187_cast_fp16")];
+            tensor<string, []> input_189_pad_type_0 = const()[name = tensor<string, []>("input_189_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_189_strides_0 = const()[name = tensor<string, []>("input_189_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_189_pad_0 = const()[name = tensor<string, []>("input_189_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_189_dilations_0 = const()[name = tensor<string, []>("input_189_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_189_groups_0 = const()[name = tensor<string, []>("input_189_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_18_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(736143808)))];
+            tensor<fp16, [5120]> blocks_18_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(749251072)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_189_cast_fp16 = conv(bias = blocks_18_mlp_0_bias_to_fp16, dilations = input_189_dilations_0, groups = input_189_groups_0, pad = input_189_pad_0, pad_type = input_189_pad_type_0, strides = input_189_strides_0, weight = blocks_18_mlp_0_weight_to_fp16, x = input_187_cast_fp16)[name = tensor<string, []>("input_189_cast_fp16")];
+            tensor<string, []> input_191_mode_0 = const()[name = tensor<string, []>("input_191_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_191_cast_fp16 = gelu(mode = input_191_mode_0, x = input_189_cast_fp16)[name = tensor<string, []>("input_191_cast_fp16")];
+            tensor<string, []> var_5226_pad_type_0 = const()[name = tensor<string, []>("op_5226_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5226_strides_0 = const()[name = tensor<string, []>("op_5226_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5226_pad_0 = const()[name = tensor<string, []>("op_5226_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5226_dilations_0 = const()[name = tensor<string, []>("op_5226_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5226_groups_0 = const()[name = tensor<string, []>("op_5226_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_18_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(749261376)))];
+            tensor<fp16, [1280]> blocks_18_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(762368640)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5226_cast_fp16 = conv(bias = blocks_18_mlp_2_bias_to_fp16, dilations = var_5226_dilations_0, groups = var_5226_groups_0, pad = var_5226_pad_0, pad_type = var_5226_pad_type_0, strides = var_5226_strides_0, weight = blocks_18_mlp_2_weight_to_fp16, x = input_191_cast_fp16)[name = tensor<string, []>("op_5226_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_77_cast_fp16 = add(x = inputs_75_cast_fp16, y = var_5226_cast_fp16)[name = tensor<string, []>("inputs_77_cast_fp16")];
+            tensor<int32, []> var_5235 = const()[name = tensor<string, []>("op_5235"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_193_axes_0 = const()[name = tensor<string, []>("input_193_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_193_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_193_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(762371264)))];
+            tensor<fp16, [1280]> input_193_beta_0_to_fp16 = const()[name = tensor<string, []>("input_193_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(762373888)))];
+            tensor<fp16, []> var_5251_to_fp16 = const()[name = tensor<string, []>("op_5251_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_193_cast_fp16 = layer_norm(axes = input_193_axes_0, beta = input_193_beta_0_to_fp16, epsilon = var_5251_to_fp16, gamma = input_193_gamma_0_to_fp16, x = inputs_77_cast_fp16)[name = tensor<string, []>("input_193_cast_fp16")];
+            tensor<string, []> q_39_pad_type_0 = const()[name = tensor<string, []>("q_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_39_strides_0 = const()[name = tensor<string, []>("q_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_39_pad_0 = const()[name = tensor<string, []>("q_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_39_dilations_0 = const()[name = tensor<string, []>("q_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_39_groups_0 = const()[name = tensor<string, []>("q_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_5286_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5286_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(762376512)))];
+            tensor<fp16, [1280]> var_5286_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5286_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(765653376)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5286_cast_fp16 = conv(bias = var_5286_bias_0_to_fp16, dilations = q_39_dilations_0, groups = q_39_groups_0, pad = q_39_pad_0, pad_type = q_39_pad_type_0, strides = q_39_strides_0, weight = var_5286_weight_0_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("op_5286_cast_fp16")];
+            tensor<string, []> k_39_pad_type_0 = const()[name = tensor<string, []>("k_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_39_strides_0 = const()[name = tensor<string, []>("k_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_39_pad_0 = const()[name = tensor<string, []>("k_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_39_dilations_0 = const()[name = tensor<string, []>("k_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_39_groups_0 = const()[name = tensor<string, []>("k_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_19_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(765656000)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_39_cast_fp16 = conv(dilations = k_39_dilations_0, groups = k_39_groups_0, pad = k_39_pad_0, pad_type = k_39_pad_type_0, strides = k_39_strides_0, weight = blocks_19_attn_key_weight_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("k_39_cast_fp16")];
+            tensor<string, []> var_5284_pad_type_0 = const()[name = tensor<string, []>("op_5284_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5284_strides_0 = const()[name = tensor<string, []>("op_5284_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5284_pad_0 = const()[name = tensor<string, []>("op_5284_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5284_dilations_0 = const()[name = tensor<string, []>("op_5284_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5284_groups_0 = const()[name = tensor<string, []>("op_5284_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_19_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(768932864)))];
+            tensor<fp16, [1280]> blocks_19_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(772209728)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5284_cast_fp16 = conv(bias = blocks_19_attn_value_bias_to_fp16, dilations = var_5284_dilations_0, groups = var_5284_groups_0, pad = var_5284_pad_0, pad_type = var_5284_pad_type_0, strides = var_5284_strides_0, weight = blocks_19_attn_value_weight_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("op_5284_cast_fp16")];
+            tensor<int32, [20]> tile_57 = const()[name = tensor<string, []>("tile_57"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5287_axis_0 = const()[name = tensor<string, []>("op_5287_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5287_cast_fp16_19 = split(axis = var_5287_axis_0, split_sizes = tile_57, x = var_5286_cast_fp16)[name = tensor<string, []>("op_5287_cast_fp16")];
+            tensor<int32, [4]> var_5308_perm_0 = const()[name = tensor<string, []>("op_5308_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_58 = const()[name = tensor<string, []>("tile_58"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5309_axis_0 = const()[name = tensor<string, []>("op_5309_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_5308_cast_fp16 = transpose(perm = var_5308_perm_0, x = k_39_cast_fp16)[name = tensor<string, []>("transpose_13")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_5309_cast_fp16_19 = split(axis = var_5309_axis_0, split_sizes = tile_58, x = var_5308_cast_fp16)[name = tensor<string, []>("op_5309_cast_fp16")];
+            tensor<int32, [20]> tile_59 = const()[name = tensor<string, []>("tile_59"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5330_axis_0 = const()[name = tensor<string, []>("op_5330_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5330_cast_fp16_19 = split(axis = var_5330_axis_0, split_sizes = tile_59, x = var_5284_cast_fp16)[name = tensor<string, []>("op_5330_cast_fp16")];
+            tensor<string, []> aw_761_equation_0 = const()[name = tensor<string, []>("aw_761_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_761_cast_fp16 = einsum(equation = aw_761_equation_0, values = (var_5309_cast_fp16_0, var_5287_cast_fp16_0))[name = tensor<string, []>("aw_761_cast_fp16")];
+            tensor<string, []> aw_763_equation_0 = const()[name = tensor<string, []>("aw_763_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_763_cast_fp16 = einsum(equation = aw_763_equation_0, values = (var_5309_cast_fp16_1, var_5287_cast_fp16_1))[name = tensor<string, []>("aw_763_cast_fp16")];
+            tensor<string, []> aw_765_equation_0 = const()[name = tensor<string, []>("aw_765_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_765_cast_fp16 = einsum(equation = aw_765_equation_0, values = (var_5309_cast_fp16_2, var_5287_cast_fp16_2))[name = tensor<string, []>("aw_765_cast_fp16")];
+            tensor<string, []> aw_767_equation_0 = const()[name = tensor<string, []>("aw_767_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_767_cast_fp16 = einsum(equation = aw_767_equation_0, values = (var_5309_cast_fp16_3, var_5287_cast_fp16_3))[name = tensor<string, []>("aw_767_cast_fp16")];
+            tensor<string, []> aw_769_equation_0 = const()[name = tensor<string, []>("aw_769_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_769_cast_fp16 = einsum(equation = aw_769_equation_0, values = (var_5309_cast_fp16_4, var_5287_cast_fp16_4))[name = tensor<string, []>("aw_769_cast_fp16")];
+            tensor<string, []> aw_771_equation_0 = const()[name = tensor<string, []>("aw_771_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_771_cast_fp16 = einsum(equation = aw_771_equation_0, values = (var_5309_cast_fp16_5, var_5287_cast_fp16_5))[name = tensor<string, []>("aw_771_cast_fp16")];
+            tensor<string, []> aw_773_equation_0 = const()[name = tensor<string, []>("aw_773_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_773_cast_fp16 = einsum(equation = aw_773_equation_0, values = (var_5309_cast_fp16_6, var_5287_cast_fp16_6))[name = tensor<string, []>("aw_773_cast_fp16")];
+            tensor<string, []> aw_775_equation_0 = const()[name = tensor<string, []>("aw_775_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_775_cast_fp16 = einsum(equation = aw_775_equation_0, values = (var_5309_cast_fp16_7, var_5287_cast_fp16_7))[name = tensor<string, []>("aw_775_cast_fp16")];
+            tensor<string, []> aw_777_equation_0 = const()[name = tensor<string, []>("aw_777_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_777_cast_fp16 = einsum(equation = aw_777_equation_0, values = (var_5309_cast_fp16_8, var_5287_cast_fp16_8))[name = tensor<string, []>("aw_777_cast_fp16")];
+            tensor<string, []> aw_779_equation_0 = const()[name = tensor<string, []>("aw_779_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_779_cast_fp16 = einsum(equation = aw_779_equation_0, values = (var_5309_cast_fp16_9, var_5287_cast_fp16_9))[name = tensor<string, []>("aw_779_cast_fp16")];
+            tensor<string, []> aw_781_equation_0 = const()[name = tensor<string, []>("aw_781_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_781_cast_fp16 = einsum(equation = aw_781_equation_0, values = (var_5309_cast_fp16_10, var_5287_cast_fp16_10))[name = tensor<string, []>("aw_781_cast_fp16")];
+            tensor<string, []> aw_783_equation_0 = const()[name = tensor<string, []>("aw_783_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_783_cast_fp16 = einsum(equation = aw_783_equation_0, values = (var_5309_cast_fp16_11, var_5287_cast_fp16_11))[name = tensor<string, []>("aw_783_cast_fp16")];
+            tensor<string, []> aw_785_equation_0 = const()[name = tensor<string, []>("aw_785_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_785_cast_fp16 = einsum(equation = aw_785_equation_0, values = (var_5309_cast_fp16_12, var_5287_cast_fp16_12))[name = tensor<string, []>("aw_785_cast_fp16")];
+            tensor<string, []> aw_787_equation_0 = const()[name = tensor<string, []>("aw_787_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_787_cast_fp16 = einsum(equation = aw_787_equation_0, values = (var_5309_cast_fp16_13, var_5287_cast_fp16_13))[name = tensor<string, []>("aw_787_cast_fp16")];
+            tensor<string, []> aw_789_equation_0 = const()[name = tensor<string, []>("aw_789_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_789_cast_fp16 = einsum(equation = aw_789_equation_0, values = (var_5309_cast_fp16_14, var_5287_cast_fp16_14))[name = tensor<string, []>("aw_789_cast_fp16")];
+            tensor<string, []> aw_791_equation_0 = const()[name = tensor<string, []>("aw_791_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_791_cast_fp16 = einsum(equation = aw_791_equation_0, values = (var_5309_cast_fp16_15, var_5287_cast_fp16_15))[name = tensor<string, []>("aw_791_cast_fp16")];
+            tensor<string, []> aw_793_equation_0 = const()[name = tensor<string, []>("aw_793_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_793_cast_fp16 = einsum(equation = aw_793_equation_0, values = (var_5309_cast_fp16_16, var_5287_cast_fp16_16))[name = tensor<string, []>("aw_793_cast_fp16")];
+            tensor<string, []> aw_795_equation_0 = const()[name = tensor<string, []>("aw_795_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_795_cast_fp16 = einsum(equation = aw_795_equation_0, values = (var_5309_cast_fp16_17, var_5287_cast_fp16_17))[name = tensor<string, []>("aw_795_cast_fp16")];
+            tensor<string, []> aw_797_equation_0 = const()[name = tensor<string, []>("aw_797_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_797_cast_fp16 = einsum(equation = aw_797_equation_0, values = (var_5309_cast_fp16_18, var_5287_cast_fp16_18))[name = tensor<string, []>("aw_797_cast_fp16")];
+            tensor<string, []> aw_799_equation_0 = const()[name = tensor<string, []>("aw_799_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_799_cast_fp16 = einsum(equation = aw_799_equation_0, values = (var_5309_cast_fp16_19, var_5287_cast_fp16_19))[name = tensor<string, []>("aw_799_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5391_cast_fp16 = softmax(axis = var_5235, x = aw_761_cast_fp16)[name = tensor<string, []>("op_5391_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5392_cast_fp16 = softmax(axis = var_5235, x = aw_763_cast_fp16)[name = tensor<string, []>("op_5392_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5393_cast_fp16 = softmax(axis = var_5235, x = aw_765_cast_fp16)[name = tensor<string, []>("op_5393_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5394_cast_fp16 = softmax(axis = var_5235, x = aw_767_cast_fp16)[name = tensor<string, []>("op_5394_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5395_cast_fp16 = softmax(axis = var_5235, x = aw_769_cast_fp16)[name = tensor<string, []>("op_5395_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5396_cast_fp16 = softmax(axis = var_5235, x = aw_771_cast_fp16)[name = tensor<string, []>("op_5396_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5397_cast_fp16 = softmax(axis = var_5235, x = aw_773_cast_fp16)[name = tensor<string, []>("op_5397_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5398_cast_fp16 = softmax(axis = var_5235, x = aw_775_cast_fp16)[name = tensor<string, []>("op_5398_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5399_cast_fp16 = softmax(axis = var_5235, x = aw_777_cast_fp16)[name = tensor<string, []>("op_5399_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5400_cast_fp16 = softmax(axis = var_5235, x = aw_779_cast_fp16)[name = tensor<string, []>("op_5400_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5401_cast_fp16 = softmax(axis = var_5235, x = aw_781_cast_fp16)[name = tensor<string, []>("op_5401_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5402_cast_fp16 = softmax(axis = var_5235, x = aw_783_cast_fp16)[name = tensor<string, []>("op_5402_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5403_cast_fp16 = softmax(axis = var_5235, x = aw_785_cast_fp16)[name = tensor<string, []>("op_5403_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5404_cast_fp16 = softmax(axis = var_5235, x = aw_787_cast_fp16)[name = tensor<string, []>("op_5404_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5405_cast_fp16 = softmax(axis = var_5235, x = aw_789_cast_fp16)[name = tensor<string, []>("op_5405_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5406_cast_fp16 = softmax(axis = var_5235, x = aw_791_cast_fp16)[name = tensor<string, []>("op_5406_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5407_cast_fp16 = softmax(axis = var_5235, x = aw_793_cast_fp16)[name = tensor<string, []>("op_5407_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5408_cast_fp16 = softmax(axis = var_5235, x = aw_795_cast_fp16)[name = tensor<string, []>("op_5408_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5409_cast_fp16 = softmax(axis = var_5235, x = aw_797_cast_fp16)[name = tensor<string, []>("op_5409_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5410_cast_fp16 = softmax(axis = var_5235, x = aw_799_cast_fp16)[name = tensor<string, []>("op_5410_cast_fp16")];
+            tensor<string, []> var_5412_equation_0 = const()[name = tensor<string, []>("op_5412_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5412_cast_fp16 = einsum(equation = var_5412_equation_0, values = (var_5330_cast_fp16_0, var_5391_cast_fp16))[name = tensor<string, []>("op_5412_cast_fp16")];
+            tensor<string, []> var_5414_equation_0 = const()[name = tensor<string, []>("op_5414_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5414_cast_fp16 = einsum(equation = var_5414_equation_0, values = (var_5330_cast_fp16_1, var_5392_cast_fp16))[name = tensor<string, []>("op_5414_cast_fp16")];
+            tensor<string, []> var_5416_equation_0 = const()[name = tensor<string, []>("op_5416_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5416_cast_fp16 = einsum(equation = var_5416_equation_0, values = (var_5330_cast_fp16_2, var_5393_cast_fp16))[name = tensor<string, []>("op_5416_cast_fp16")];
+            tensor<string, []> var_5418_equation_0 = const()[name = tensor<string, []>("op_5418_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5418_cast_fp16 = einsum(equation = var_5418_equation_0, values = (var_5330_cast_fp16_3, var_5394_cast_fp16))[name = tensor<string, []>("op_5418_cast_fp16")];
+            tensor<string, []> var_5420_equation_0 = const()[name = tensor<string, []>("op_5420_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5420_cast_fp16 = einsum(equation = var_5420_equation_0, values = (var_5330_cast_fp16_4, var_5395_cast_fp16))[name = tensor<string, []>("op_5420_cast_fp16")];
+            tensor<string, []> var_5422_equation_0 = const()[name = tensor<string, []>("op_5422_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5422_cast_fp16 = einsum(equation = var_5422_equation_0, values = (var_5330_cast_fp16_5, var_5396_cast_fp16))[name = tensor<string, []>("op_5422_cast_fp16")];
+            tensor<string, []> var_5424_equation_0 = const()[name = tensor<string, []>("op_5424_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5424_cast_fp16 = einsum(equation = var_5424_equation_0, values = (var_5330_cast_fp16_6, var_5397_cast_fp16))[name = tensor<string, []>("op_5424_cast_fp16")];
+            tensor<string, []> var_5426_equation_0 = const()[name = tensor<string, []>("op_5426_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5426_cast_fp16 = einsum(equation = var_5426_equation_0, values = (var_5330_cast_fp16_7, var_5398_cast_fp16))[name = tensor<string, []>("op_5426_cast_fp16")];
+            tensor<string, []> var_5428_equation_0 = const()[name = tensor<string, []>("op_5428_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5428_cast_fp16 = einsum(equation = var_5428_equation_0, values = (var_5330_cast_fp16_8, var_5399_cast_fp16))[name = tensor<string, []>("op_5428_cast_fp16")];
+            tensor<string, []> var_5430_equation_0 = const()[name = tensor<string, []>("op_5430_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5430_cast_fp16 = einsum(equation = var_5430_equation_0, values = (var_5330_cast_fp16_9, var_5400_cast_fp16))[name = tensor<string, []>("op_5430_cast_fp16")];
+            tensor<string, []> var_5432_equation_0 = const()[name = tensor<string, []>("op_5432_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5432_cast_fp16 = einsum(equation = var_5432_equation_0, values = (var_5330_cast_fp16_10, var_5401_cast_fp16))[name = tensor<string, []>("op_5432_cast_fp16")];
+            tensor<string, []> var_5434_equation_0 = const()[name = tensor<string, []>("op_5434_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5434_cast_fp16 = einsum(equation = var_5434_equation_0, values = (var_5330_cast_fp16_11, var_5402_cast_fp16))[name = tensor<string, []>("op_5434_cast_fp16")];
+            tensor<string, []> var_5436_equation_0 = const()[name = tensor<string, []>("op_5436_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5436_cast_fp16 = einsum(equation = var_5436_equation_0, values = (var_5330_cast_fp16_12, var_5403_cast_fp16))[name = tensor<string, []>("op_5436_cast_fp16")];
+            tensor<string, []> var_5438_equation_0 = const()[name = tensor<string, []>("op_5438_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5438_cast_fp16 = einsum(equation = var_5438_equation_0, values = (var_5330_cast_fp16_13, var_5404_cast_fp16))[name = tensor<string, []>("op_5438_cast_fp16")];
+            tensor<string, []> var_5440_equation_0 = const()[name = tensor<string, []>("op_5440_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5440_cast_fp16 = einsum(equation = var_5440_equation_0, values = (var_5330_cast_fp16_14, var_5405_cast_fp16))[name = tensor<string, []>("op_5440_cast_fp16")];
+            tensor<string, []> var_5442_equation_0 = const()[name = tensor<string, []>("op_5442_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5442_cast_fp16 = einsum(equation = var_5442_equation_0, values = (var_5330_cast_fp16_15, var_5406_cast_fp16))[name = tensor<string, []>("op_5442_cast_fp16")];
+            tensor<string, []> var_5444_equation_0 = const()[name = tensor<string, []>("op_5444_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5444_cast_fp16 = einsum(equation = var_5444_equation_0, values = (var_5330_cast_fp16_16, var_5407_cast_fp16))[name = tensor<string, []>("op_5444_cast_fp16")];
+            tensor<string, []> var_5446_equation_0 = const()[name = tensor<string, []>("op_5446_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5446_cast_fp16 = einsum(equation = var_5446_equation_0, values = (var_5330_cast_fp16_17, var_5408_cast_fp16))[name = tensor<string, []>("op_5446_cast_fp16")];
+            tensor<string, []> var_5448_equation_0 = const()[name = tensor<string, []>("op_5448_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5448_cast_fp16 = einsum(equation = var_5448_equation_0, values = (var_5330_cast_fp16_18, var_5409_cast_fp16))[name = tensor<string, []>("op_5448_cast_fp16")];
+            tensor<string, []> var_5450_equation_0 = const()[name = tensor<string, []>("op_5450_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5450_cast_fp16 = einsum(equation = var_5450_equation_0, values = (var_5330_cast_fp16_19, var_5410_cast_fp16))[name = tensor<string, []>("op_5450_cast_fp16")];
+            tensor<bool, []> input_195_interleave_0 = const()[name = tensor<string, []>("input_195_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_195_cast_fp16 = concat(axis = var_5235, interleave = input_195_interleave_0, values = (var_5412_cast_fp16, var_5414_cast_fp16, var_5416_cast_fp16, var_5418_cast_fp16, var_5420_cast_fp16, var_5422_cast_fp16, var_5424_cast_fp16, var_5426_cast_fp16, var_5428_cast_fp16, var_5430_cast_fp16, var_5432_cast_fp16, var_5434_cast_fp16, var_5436_cast_fp16, var_5438_cast_fp16, var_5440_cast_fp16, var_5442_cast_fp16, var_5444_cast_fp16, var_5446_cast_fp16, var_5448_cast_fp16, var_5450_cast_fp16))[name = tensor<string, []>("input_195_cast_fp16")];
+            tensor<string, []> var_5459_pad_type_0 = const()[name = tensor<string, []>("op_5459_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5459_strides_0 = const()[name = tensor<string, []>("op_5459_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5459_pad_0 = const()[name = tensor<string, []>("op_5459_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5459_dilations_0 = const()[name = tensor<string, []>("op_5459_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5459_groups_0 = const()[name = tensor<string, []>("op_5459_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_19_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(772212352)))];
+            tensor<fp16, [1280]> blocks_19_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(775489216)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5459_cast_fp16 = conv(bias = blocks_19_attn_out_bias_to_fp16, dilations = var_5459_dilations_0, groups = var_5459_groups_0, pad = var_5459_pad_0, pad_type = var_5459_pad_type_0, strides = var_5459_strides_0, weight = blocks_19_attn_out_weight_to_fp16, x = input_195_cast_fp16)[name = tensor<string, []>("op_5459_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_79_cast_fp16 = add(x = inputs_77_cast_fp16, y = var_5459_cast_fp16)[name = tensor<string, []>("inputs_79_cast_fp16")];
+            tensor<int32, [1]> input_197_axes_0 = const()[name = tensor<string, []>("input_197_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_197_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_197_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(775491840)))];
+            tensor<fp16, [1280]> input_197_beta_0_to_fp16 = const()[name = tensor<string, []>("input_197_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(775494464)))];
+            tensor<fp16, []> var_5469_to_fp16 = const()[name = tensor<string, []>("op_5469_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_197_cast_fp16 = layer_norm(axes = input_197_axes_0, beta = input_197_beta_0_to_fp16, epsilon = var_5469_to_fp16, gamma = input_197_gamma_0_to_fp16, x = inputs_79_cast_fp16)[name = tensor<string, []>("input_197_cast_fp16")];
+            tensor<string, []> input_199_pad_type_0 = const()[name = tensor<string, []>("input_199_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_199_strides_0 = const()[name = tensor<string, []>("input_199_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_199_pad_0 = const()[name = tensor<string, []>("input_199_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_199_dilations_0 = const()[name = tensor<string, []>("input_199_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_199_groups_0 = const()[name = tensor<string, []>("input_199_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_19_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(775497088)))];
+            tensor<fp16, [5120]> blocks_19_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(788604352)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_199_cast_fp16 = conv(bias = blocks_19_mlp_0_bias_to_fp16, dilations = input_199_dilations_0, groups = input_199_groups_0, pad = input_199_pad_0, pad_type = input_199_pad_type_0, strides = input_199_strides_0, weight = blocks_19_mlp_0_weight_to_fp16, x = input_197_cast_fp16)[name = tensor<string, []>("input_199_cast_fp16")];
+            tensor<string, []> input_201_mode_0 = const()[name = tensor<string, []>("input_201_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_201_cast_fp16 = gelu(mode = input_201_mode_0, x = input_199_cast_fp16)[name = tensor<string, []>("input_201_cast_fp16")];
+            tensor<string, []> var_5495_pad_type_0 = const()[name = tensor<string, []>("op_5495_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5495_strides_0 = const()[name = tensor<string, []>("op_5495_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5495_pad_0 = const()[name = tensor<string, []>("op_5495_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5495_dilations_0 = const()[name = tensor<string, []>("op_5495_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5495_groups_0 = const()[name = tensor<string, []>("op_5495_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_19_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(788614656)))];
+            tensor<fp16, [1280]> blocks_19_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(801721920)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5495_cast_fp16 = conv(bias = blocks_19_mlp_2_bias_to_fp16, dilations = var_5495_dilations_0, groups = var_5495_groups_0, pad = var_5495_pad_0, pad_type = var_5495_pad_type_0, strides = var_5495_strides_0, weight = blocks_19_mlp_2_weight_to_fp16, x = input_201_cast_fp16)[name = tensor<string, []>("op_5495_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_81_cast_fp16 = add(x = inputs_79_cast_fp16, y = var_5495_cast_fp16)[name = tensor<string, []>("inputs_81_cast_fp16")];
+            tensor<int32, []> var_5504 = const()[name = tensor<string, []>("op_5504"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_203_axes_0 = const()[name = tensor<string, []>("input_203_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_203_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_203_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(801724544)))];
+            tensor<fp16, [1280]> input_203_beta_0_to_fp16 = const()[name = tensor<string, []>("input_203_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(801727168)))];
+            tensor<fp16, []> var_5520_to_fp16 = const()[name = tensor<string, []>("op_5520_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_203_cast_fp16 = layer_norm(axes = input_203_axes_0, beta = input_203_beta_0_to_fp16, epsilon = var_5520_to_fp16, gamma = input_203_gamma_0_to_fp16, x = inputs_81_cast_fp16)[name = tensor<string, []>("input_203_cast_fp16")];
+            tensor<string, []> q_41_pad_type_0 = const()[name = tensor<string, []>("q_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_41_strides_0 = const()[name = tensor<string, []>("q_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_41_pad_0 = const()[name = tensor<string, []>("q_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_41_dilations_0 = const()[name = tensor<string, []>("q_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_41_groups_0 = const()[name = tensor<string, []>("q_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_5555_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5555_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(801729792)))];
+            tensor<fp16, [1280]> var_5555_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5555_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(805006656)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5555_cast_fp16 = conv(bias = var_5555_bias_0_to_fp16, dilations = q_41_dilations_0, groups = q_41_groups_0, pad = q_41_pad_0, pad_type = q_41_pad_type_0, strides = q_41_strides_0, weight = var_5555_weight_0_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("op_5555_cast_fp16")];
+            tensor<string, []> k_41_pad_type_0 = const()[name = tensor<string, []>("k_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_41_strides_0 = const()[name = tensor<string, []>("k_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_41_pad_0 = const()[name = tensor<string, []>("k_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_41_dilations_0 = const()[name = tensor<string, []>("k_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_41_groups_0 = const()[name = tensor<string, []>("k_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_20_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(805009280)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_41_cast_fp16 = conv(dilations = k_41_dilations_0, groups = k_41_groups_0, pad = k_41_pad_0, pad_type = k_41_pad_type_0, strides = k_41_strides_0, weight = blocks_20_attn_key_weight_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("k_41_cast_fp16")];
+            tensor<string, []> var_5553_pad_type_0 = const()[name = tensor<string, []>("op_5553_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5553_strides_0 = const()[name = tensor<string, []>("op_5553_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5553_pad_0 = const()[name = tensor<string, []>("op_5553_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5553_dilations_0 = const()[name = tensor<string, []>("op_5553_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5553_groups_0 = const()[name = tensor<string, []>("op_5553_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_20_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(808286144)))];
+            tensor<fp16, [1280]> blocks_20_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(811563008)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5553_cast_fp16 = conv(bias = blocks_20_attn_value_bias_to_fp16, dilations = var_5553_dilations_0, groups = var_5553_groups_0, pad = var_5553_pad_0, pad_type = var_5553_pad_type_0, strides = var_5553_strides_0, weight = blocks_20_attn_value_weight_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("op_5553_cast_fp16")];
+            tensor<int32, [20]> tile_60 = const()[name = tensor<string, []>("tile_60"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5556_axis_0 = const()[name = tensor<string, []>("op_5556_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5556_cast_fp16_19 = split(axis = var_5556_axis_0, split_sizes = tile_60, x = var_5555_cast_fp16)[name = tensor<string, []>("op_5556_cast_fp16")];
+            tensor<int32, [4]> var_5577_perm_0 = const()[name = tensor<string, []>("op_5577_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_61 = const()[name = tensor<string, []>("tile_61"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5578_axis_0 = const()[name = tensor<string, []>("op_5578_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_5577_cast_fp16 = transpose(perm = var_5577_perm_0, x = k_41_cast_fp16)[name = tensor<string, []>("transpose_12")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_5578_cast_fp16_19 = split(axis = var_5578_axis_0, split_sizes = tile_61, x = var_5577_cast_fp16)[name = tensor<string, []>("op_5578_cast_fp16")];
+            tensor<int32, [20]> tile_62 = const()[name = tensor<string, []>("tile_62"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5599_axis_0 = const()[name = tensor<string, []>("op_5599_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5599_cast_fp16_19 = split(axis = var_5599_axis_0, split_sizes = tile_62, x = var_5553_cast_fp16)[name = tensor<string, []>("op_5599_cast_fp16")];
+            tensor<string, []> aw_801_equation_0 = const()[name = tensor<string, []>("aw_801_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_801_cast_fp16 = einsum(equation = aw_801_equation_0, values = (var_5578_cast_fp16_0, var_5556_cast_fp16_0))[name = tensor<string, []>("aw_801_cast_fp16")];
+            tensor<string, []> aw_803_equation_0 = const()[name = tensor<string, []>("aw_803_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_803_cast_fp16 = einsum(equation = aw_803_equation_0, values = (var_5578_cast_fp16_1, var_5556_cast_fp16_1))[name = tensor<string, []>("aw_803_cast_fp16")];
+            tensor<string, []> aw_805_equation_0 = const()[name = tensor<string, []>("aw_805_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_805_cast_fp16 = einsum(equation = aw_805_equation_0, values = (var_5578_cast_fp16_2, var_5556_cast_fp16_2))[name = tensor<string, []>("aw_805_cast_fp16")];
+            tensor<string, []> aw_807_equation_0 = const()[name = tensor<string, []>("aw_807_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_807_cast_fp16 = einsum(equation = aw_807_equation_0, values = (var_5578_cast_fp16_3, var_5556_cast_fp16_3))[name = tensor<string, []>("aw_807_cast_fp16")];
+            tensor<string, []> aw_809_equation_0 = const()[name = tensor<string, []>("aw_809_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_809_cast_fp16 = einsum(equation = aw_809_equation_0, values = (var_5578_cast_fp16_4, var_5556_cast_fp16_4))[name = tensor<string, []>("aw_809_cast_fp16")];
+            tensor<string, []> aw_811_equation_0 = const()[name = tensor<string, []>("aw_811_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_811_cast_fp16 = einsum(equation = aw_811_equation_0, values = (var_5578_cast_fp16_5, var_5556_cast_fp16_5))[name = tensor<string, []>("aw_811_cast_fp16")];
+            tensor<string, []> aw_813_equation_0 = const()[name = tensor<string, []>("aw_813_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_813_cast_fp16 = einsum(equation = aw_813_equation_0, values = (var_5578_cast_fp16_6, var_5556_cast_fp16_6))[name = tensor<string, []>("aw_813_cast_fp16")];
+            tensor<string, []> aw_815_equation_0 = const()[name = tensor<string, []>("aw_815_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_815_cast_fp16 = einsum(equation = aw_815_equation_0, values = (var_5578_cast_fp16_7, var_5556_cast_fp16_7))[name = tensor<string, []>("aw_815_cast_fp16")];
+            tensor<string, []> aw_817_equation_0 = const()[name = tensor<string, []>("aw_817_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_817_cast_fp16 = einsum(equation = aw_817_equation_0, values = (var_5578_cast_fp16_8, var_5556_cast_fp16_8))[name = tensor<string, []>("aw_817_cast_fp16")];
+            tensor<string, []> aw_819_equation_0 = const()[name = tensor<string, []>("aw_819_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_819_cast_fp16 = einsum(equation = aw_819_equation_0, values = (var_5578_cast_fp16_9, var_5556_cast_fp16_9))[name = tensor<string, []>("aw_819_cast_fp16")];
+            tensor<string, []> aw_821_equation_0 = const()[name = tensor<string, []>("aw_821_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_821_cast_fp16 = einsum(equation = aw_821_equation_0, values = (var_5578_cast_fp16_10, var_5556_cast_fp16_10))[name = tensor<string, []>("aw_821_cast_fp16")];
+            tensor<string, []> aw_823_equation_0 = const()[name = tensor<string, []>("aw_823_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_823_cast_fp16 = einsum(equation = aw_823_equation_0, values = (var_5578_cast_fp16_11, var_5556_cast_fp16_11))[name = tensor<string, []>("aw_823_cast_fp16")];
+            tensor<string, []> aw_825_equation_0 = const()[name = tensor<string, []>("aw_825_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_825_cast_fp16 = einsum(equation = aw_825_equation_0, values = (var_5578_cast_fp16_12, var_5556_cast_fp16_12))[name = tensor<string, []>("aw_825_cast_fp16")];
+            tensor<string, []> aw_827_equation_0 = const()[name = tensor<string, []>("aw_827_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_827_cast_fp16 = einsum(equation = aw_827_equation_0, values = (var_5578_cast_fp16_13, var_5556_cast_fp16_13))[name = tensor<string, []>("aw_827_cast_fp16")];
+            tensor<string, []> aw_829_equation_0 = const()[name = tensor<string, []>("aw_829_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_829_cast_fp16 = einsum(equation = aw_829_equation_0, values = (var_5578_cast_fp16_14, var_5556_cast_fp16_14))[name = tensor<string, []>("aw_829_cast_fp16")];
+            tensor<string, []> aw_831_equation_0 = const()[name = tensor<string, []>("aw_831_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_831_cast_fp16 = einsum(equation = aw_831_equation_0, values = (var_5578_cast_fp16_15, var_5556_cast_fp16_15))[name = tensor<string, []>("aw_831_cast_fp16")];
+            tensor<string, []> aw_833_equation_0 = const()[name = tensor<string, []>("aw_833_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_833_cast_fp16 = einsum(equation = aw_833_equation_0, values = (var_5578_cast_fp16_16, var_5556_cast_fp16_16))[name = tensor<string, []>("aw_833_cast_fp16")];
+            tensor<string, []> aw_835_equation_0 = const()[name = tensor<string, []>("aw_835_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_835_cast_fp16 = einsum(equation = aw_835_equation_0, values = (var_5578_cast_fp16_17, var_5556_cast_fp16_17))[name = tensor<string, []>("aw_835_cast_fp16")];
+            tensor<string, []> aw_837_equation_0 = const()[name = tensor<string, []>("aw_837_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_837_cast_fp16 = einsum(equation = aw_837_equation_0, values = (var_5578_cast_fp16_18, var_5556_cast_fp16_18))[name = tensor<string, []>("aw_837_cast_fp16")];
+            tensor<string, []> aw_839_equation_0 = const()[name = tensor<string, []>("aw_839_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_839_cast_fp16 = einsum(equation = aw_839_equation_0, values = (var_5578_cast_fp16_19, var_5556_cast_fp16_19))[name = tensor<string, []>("aw_839_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5660_cast_fp16 = softmax(axis = var_5504, x = aw_801_cast_fp16)[name = tensor<string, []>("op_5660_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5661_cast_fp16 = softmax(axis = var_5504, x = aw_803_cast_fp16)[name = tensor<string, []>("op_5661_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5662_cast_fp16 = softmax(axis = var_5504, x = aw_805_cast_fp16)[name = tensor<string, []>("op_5662_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5663_cast_fp16 = softmax(axis = var_5504, x = aw_807_cast_fp16)[name = tensor<string, []>("op_5663_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5664_cast_fp16 = softmax(axis = var_5504, x = aw_809_cast_fp16)[name = tensor<string, []>("op_5664_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5665_cast_fp16 = softmax(axis = var_5504, x = aw_811_cast_fp16)[name = tensor<string, []>("op_5665_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5666_cast_fp16 = softmax(axis = var_5504, x = aw_813_cast_fp16)[name = tensor<string, []>("op_5666_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5667_cast_fp16 = softmax(axis = var_5504, x = aw_815_cast_fp16)[name = tensor<string, []>("op_5667_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5668_cast_fp16 = softmax(axis = var_5504, x = aw_817_cast_fp16)[name = tensor<string, []>("op_5668_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5669_cast_fp16 = softmax(axis = var_5504, x = aw_819_cast_fp16)[name = tensor<string, []>("op_5669_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5670_cast_fp16 = softmax(axis = var_5504, x = aw_821_cast_fp16)[name = tensor<string, []>("op_5670_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5671_cast_fp16 = softmax(axis = var_5504, x = aw_823_cast_fp16)[name = tensor<string, []>("op_5671_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5672_cast_fp16 = softmax(axis = var_5504, x = aw_825_cast_fp16)[name = tensor<string, []>("op_5672_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5673_cast_fp16 = softmax(axis = var_5504, x = aw_827_cast_fp16)[name = tensor<string, []>("op_5673_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5674_cast_fp16 = softmax(axis = var_5504, x = aw_829_cast_fp16)[name = tensor<string, []>("op_5674_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5675_cast_fp16 = softmax(axis = var_5504, x = aw_831_cast_fp16)[name = tensor<string, []>("op_5675_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5676_cast_fp16 = softmax(axis = var_5504, x = aw_833_cast_fp16)[name = tensor<string, []>("op_5676_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5677_cast_fp16 = softmax(axis = var_5504, x = aw_835_cast_fp16)[name = tensor<string, []>("op_5677_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5678_cast_fp16 = softmax(axis = var_5504, x = aw_837_cast_fp16)[name = tensor<string, []>("op_5678_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5679_cast_fp16 = softmax(axis = var_5504, x = aw_839_cast_fp16)[name = tensor<string, []>("op_5679_cast_fp16")];
+            tensor<string, []> var_5681_equation_0 = const()[name = tensor<string, []>("op_5681_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5681_cast_fp16 = einsum(equation = var_5681_equation_0, values = (var_5599_cast_fp16_0, var_5660_cast_fp16))[name = tensor<string, []>("op_5681_cast_fp16")];
+            tensor<string, []> var_5683_equation_0 = const()[name = tensor<string, []>("op_5683_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5683_cast_fp16 = einsum(equation = var_5683_equation_0, values = (var_5599_cast_fp16_1, var_5661_cast_fp16))[name = tensor<string, []>("op_5683_cast_fp16")];
+            tensor<string, []> var_5685_equation_0 = const()[name = tensor<string, []>("op_5685_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5685_cast_fp16 = einsum(equation = var_5685_equation_0, values = (var_5599_cast_fp16_2, var_5662_cast_fp16))[name = tensor<string, []>("op_5685_cast_fp16")];
+            tensor<string, []> var_5687_equation_0 = const()[name = tensor<string, []>("op_5687_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5687_cast_fp16 = einsum(equation = var_5687_equation_0, values = (var_5599_cast_fp16_3, var_5663_cast_fp16))[name = tensor<string, []>("op_5687_cast_fp16")];
+            tensor<string, []> var_5689_equation_0 = const()[name = tensor<string, []>("op_5689_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5689_cast_fp16 = einsum(equation = var_5689_equation_0, values = (var_5599_cast_fp16_4, var_5664_cast_fp16))[name = tensor<string, []>("op_5689_cast_fp16")];
+            tensor<string, []> var_5691_equation_0 = const()[name = tensor<string, []>("op_5691_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5691_cast_fp16 = einsum(equation = var_5691_equation_0, values = (var_5599_cast_fp16_5, var_5665_cast_fp16))[name = tensor<string, []>("op_5691_cast_fp16")];
+            tensor<string, []> var_5693_equation_0 = const()[name = tensor<string, []>("op_5693_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5693_cast_fp16 = einsum(equation = var_5693_equation_0, values = (var_5599_cast_fp16_6, var_5666_cast_fp16))[name = tensor<string, []>("op_5693_cast_fp16")];
+            tensor<string, []> var_5695_equation_0 = const()[name = tensor<string, []>("op_5695_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5695_cast_fp16 = einsum(equation = var_5695_equation_0, values = (var_5599_cast_fp16_7, var_5667_cast_fp16))[name = tensor<string, []>("op_5695_cast_fp16")];
+            tensor<string, []> var_5697_equation_0 = const()[name = tensor<string, []>("op_5697_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5697_cast_fp16 = einsum(equation = var_5697_equation_0, values = (var_5599_cast_fp16_8, var_5668_cast_fp16))[name = tensor<string, []>("op_5697_cast_fp16")];
+            tensor<string, []> var_5699_equation_0 = const()[name = tensor<string, []>("op_5699_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5699_cast_fp16 = einsum(equation = var_5699_equation_0, values = (var_5599_cast_fp16_9, var_5669_cast_fp16))[name = tensor<string, []>("op_5699_cast_fp16")];
+            tensor<string, []> var_5701_equation_0 = const()[name = tensor<string, []>("op_5701_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5701_cast_fp16 = einsum(equation = var_5701_equation_0, values = (var_5599_cast_fp16_10, var_5670_cast_fp16))[name = tensor<string, []>("op_5701_cast_fp16")];
+            tensor<string, []> var_5703_equation_0 = const()[name = tensor<string, []>("op_5703_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5703_cast_fp16 = einsum(equation = var_5703_equation_0, values = (var_5599_cast_fp16_11, var_5671_cast_fp16))[name = tensor<string, []>("op_5703_cast_fp16")];
+            tensor<string, []> var_5705_equation_0 = const()[name = tensor<string, []>("op_5705_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5705_cast_fp16 = einsum(equation = var_5705_equation_0, values = (var_5599_cast_fp16_12, var_5672_cast_fp16))[name = tensor<string, []>("op_5705_cast_fp16")];
+            tensor<string, []> var_5707_equation_0 = const()[name = tensor<string, []>("op_5707_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5707_cast_fp16 = einsum(equation = var_5707_equation_0, values = (var_5599_cast_fp16_13, var_5673_cast_fp16))[name = tensor<string, []>("op_5707_cast_fp16")];
+            tensor<string, []> var_5709_equation_0 = const()[name = tensor<string, []>("op_5709_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5709_cast_fp16 = einsum(equation = var_5709_equation_0, values = (var_5599_cast_fp16_14, var_5674_cast_fp16))[name = tensor<string, []>("op_5709_cast_fp16")];
+            tensor<string, []> var_5711_equation_0 = const()[name = tensor<string, []>("op_5711_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5711_cast_fp16 = einsum(equation = var_5711_equation_0, values = (var_5599_cast_fp16_15, var_5675_cast_fp16))[name = tensor<string, []>("op_5711_cast_fp16")];
+            tensor<string, []> var_5713_equation_0 = const()[name = tensor<string, []>("op_5713_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5713_cast_fp16 = einsum(equation = var_5713_equation_0, values = (var_5599_cast_fp16_16, var_5676_cast_fp16))[name = tensor<string, []>("op_5713_cast_fp16")];
+            tensor<string, []> var_5715_equation_0 = const()[name = tensor<string, []>("op_5715_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5715_cast_fp16 = einsum(equation = var_5715_equation_0, values = (var_5599_cast_fp16_17, var_5677_cast_fp16))[name = tensor<string, []>("op_5715_cast_fp16")];
+            tensor<string, []> var_5717_equation_0 = const()[name = tensor<string, []>("op_5717_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5717_cast_fp16 = einsum(equation = var_5717_equation_0, values = (var_5599_cast_fp16_18, var_5678_cast_fp16))[name = tensor<string, []>("op_5717_cast_fp16")];
+            tensor<string, []> var_5719_equation_0 = const()[name = tensor<string, []>("op_5719_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5719_cast_fp16 = einsum(equation = var_5719_equation_0, values = (var_5599_cast_fp16_19, var_5679_cast_fp16))[name = tensor<string, []>("op_5719_cast_fp16")];
+            tensor<bool, []> input_205_interleave_0 = const()[name = tensor<string, []>("input_205_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_205_cast_fp16 = concat(axis = var_5504, interleave = input_205_interleave_0, values = (var_5681_cast_fp16, var_5683_cast_fp16, var_5685_cast_fp16, var_5687_cast_fp16, var_5689_cast_fp16, var_5691_cast_fp16, var_5693_cast_fp16, var_5695_cast_fp16, var_5697_cast_fp16, var_5699_cast_fp16, var_5701_cast_fp16, var_5703_cast_fp16, var_5705_cast_fp16, var_5707_cast_fp16, var_5709_cast_fp16, var_5711_cast_fp16, var_5713_cast_fp16, var_5715_cast_fp16, var_5717_cast_fp16, var_5719_cast_fp16))[name = tensor<string, []>("input_205_cast_fp16")];
+            tensor<string, []> var_5728_pad_type_0 = const()[name = tensor<string, []>("op_5728_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5728_strides_0 = const()[name = tensor<string, []>("op_5728_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5728_pad_0 = const()[name = tensor<string, []>("op_5728_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5728_dilations_0 = const()[name = tensor<string, []>("op_5728_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5728_groups_0 = const()[name = tensor<string, []>("op_5728_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_20_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(811565632)))];
+            tensor<fp16, [1280]> blocks_20_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(814842496)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5728_cast_fp16 = conv(bias = blocks_20_attn_out_bias_to_fp16, dilations = var_5728_dilations_0, groups = var_5728_groups_0, pad = var_5728_pad_0, pad_type = var_5728_pad_type_0, strides = var_5728_strides_0, weight = blocks_20_attn_out_weight_to_fp16, x = input_205_cast_fp16)[name = tensor<string, []>("op_5728_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_83_cast_fp16 = add(x = inputs_81_cast_fp16, y = var_5728_cast_fp16)[name = tensor<string, []>("inputs_83_cast_fp16")];
+            tensor<int32, [1]> input_207_axes_0 = const()[name = tensor<string, []>("input_207_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_207_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_207_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(814845120)))];
+            tensor<fp16, [1280]> input_207_beta_0_to_fp16 = const()[name = tensor<string, []>("input_207_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(814847744)))];
+            tensor<fp16, []> var_5738_to_fp16 = const()[name = tensor<string, []>("op_5738_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_207_cast_fp16 = layer_norm(axes = input_207_axes_0, beta = input_207_beta_0_to_fp16, epsilon = var_5738_to_fp16, gamma = input_207_gamma_0_to_fp16, x = inputs_83_cast_fp16)[name = tensor<string, []>("input_207_cast_fp16")];
+            tensor<string, []> input_209_pad_type_0 = const()[name = tensor<string, []>("input_209_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_209_strides_0 = const()[name = tensor<string, []>("input_209_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_209_pad_0 = const()[name = tensor<string, []>("input_209_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_209_dilations_0 = const()[name = tensor<string, []>("input_209_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_209_groups_0 = const()[name = tensor<string, []>("input_209_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_20_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(814850368)))];
+            tensor<fp16, [5120]> blocks_20_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(827957632)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_209_cast_fp16 = conv(bias = blocks_20_mlp_0_bias_to_fp16, dilations = input_209_dilations_0, groups = input_209_groups_0, pad = input_209_pad_0, pad_type = input_209_pad_type_0, strides = input_209_strides_0, weight = blocks_20_mlp_0_weight_to_fp16, x = input_207_cast_fp16)[name = tensor<string, []>("input_209_cast_fp16")];
+            tensor<string, []> input_211_mode_0 = const()[name = tensor<string, []>("input_211_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_211_cast_fp16 = gelu(mode = input_211_mode_0, x = input_209_cast_fp16)[name = tensor<string, []>("input_211_cast_fp16")];
+            tensor<string, []> var_5764_pad_type_0 = const()[name = tensor<string, []>("op_5764_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5764_strides_0 = const()[name = tensor<string, []>("op_5764_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5764_pad_0 = const()[name = tensor<string, []>("op_5764_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5764_dilations_0 = const()[name = tensor<string, []>("op_5764_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5764_groups_0 = const()[name = tensor<string, []>("op_5764_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_20_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(827967936)))];
+            tensor<fp16, [1280]> blocks_20_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(841075200)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5764_cast_fp16 = conv(bias = blocks_20_mlp_2_bias_to_fp16, dilations = var_5764_dilations_0, groups = var_5764_groups_0, pad = var_5764_pad_0, pad_type = var_5764_pad_type_0, strides = var_5764_strides_0, weight = blocks_20_mlp_2_weight_to_fp16, x = input_211_cast_fp16)[name = tensor<string, []>("op_5764_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_85_cast_fp16 = add(x = inputs_83_cast_fp16, y = var_5764_cast_fp16)[name = tensor<string, []>("inputs_85_cast_fp16")];
+            tensor<int32, []> var_5773 = const()[name = tensor<string, []>("op_5773"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_213_axes_0 = const()[name = tensor<string, []>("input_213_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_213_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_213_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(841077824)))];
+            tensor<fp16, [1280]> input_213_beta_0_to_fp16 = const()[name = tensor<string, []>("input_213_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(841080448)))];
+            tensor<fp16, []> var_5789_to_fp16 = const()[name = tensor<string, []>("op_5789_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_213_cast_fp16 = layer_norm(axes = input_213_axes_0, beta = input_213_beta_0_to_fp16, epsilon = var_5789_to_fp16, gamma = input_213_gamma_0_to_fp16, x = inputs_85_cast_fp16)[name = tensor<string, []>("input_213_cast_fp16")];
+            tensor<string, []> q_43_pad_type_0 = const()[name = tensor<string, []>("q_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_43_strides_0 = const()[name = tensor<string, []>("q_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_43_pad_0 = const()[name = tensor<string, []>("q_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_43_dilations_0 = const()[name = tensor<string, []>("q_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_43_groups_0 = const()[name = tensor<string, []>("q_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_5824_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5824_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(841083072)))];
+            tensor<fp16, [1280]> var_5824_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5824_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(844359936)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5824_cast_fp16 = conv(bias = var_5824_bias_0_to_fp16, dilations = q_43_dilations_0, groups = q_43_groups_0, pad = q_43_pad_0, pad_type = q_43_pad_type_0, strides = q_43_strides_0, weight = var_5824_weight_0_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("op_5824_cast_fp16")];
+            tensor<string, []> k_43_pad_type_0 = const()[name = tensor<string, []>("k_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_43_strides_0 = const()[name = tensor<string, []>("k_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_43_pad_0 = const()[name = tensor<string, []>("k_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_43_dilations_0 = const()[name = tensor<string, []>("k_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_43_groups_0 = const()[name = tensor<string, []>("k_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_21_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(844362560)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_43_cast_fp16 = conv(dilations = k_43_dilations_0, groups = k_43_groups_0, pad = k_43_pad_0, pad_type = k_43_pad_type_0, strides = k_43_strides_0, weight = blocks_21_attn_key_weight_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("k_43_cast_fp16")];
+            tensor<string, []> var_5822_pad_type_0 = const()[name = tensor<string, []>("op_5822_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5822_strides_0 = const()[name = tensor<string, []>("op_5822_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5822_pad_0 = const()[name = tensor<string, []>("op_5822_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5822_dilations_0 = const()[name = tensor<string, []>("op_5822_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5822_groups_0 = const()[name = tensor<string, []>("op_5822_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_21_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(847639424)))];
+            tensor<fp16, [1280]> blocks_21_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(850916288)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5822_cast_fp16 = conv(bias = blocks_21_attn_value_bias_to_fp16, dilations = var_5822_dilations_0, groups = var_5822_groups_0, pad = var_5822_pad_0, pad_type = var_5822_pad_type_0, strides = var_5822_strides_0, weight = blocks_21_attn_value_weight_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("op_5822_cast_fp16")];
+            tensor<int32, [20]> tile_63 = const()[name = tensor<string, []>("tile_63"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5825_axis_0 = const()[name = tensor<string, []>("op_5825_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5825_cast_fp16_19 = split(axis = var_5825_axis_0, split_sizes = tile_63, x = var_5824_cast_fp16)[name = tensor<string, []>("op_5825_cast_fp16")];
+            tensor<int32, [4]> var_5846_perm_0 = const()[name = tensor<string, []>("op_5846_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_64 = const()[name = tensor<string, []>("tile_64"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5847_axis_0 = const()[name = tensor<string, []>("op_5847_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_5846_cast_fp16 = transpose(perm = var_5846_perm_0, x = k_43_cast_fp16)[name = tensor<string, []>("transpose_11")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_5847_cast_fp16_19 = split(axis = var_5847_axis_0, split_sizes = tile_64, x = var_5846_cast_fp16)[name = tensor<string, []>("op_5847_cast_fp16")];
+            tensor<int32, [20]> tile_65 = const()[name = tensor<string, []>("tile_65"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5868_axis_0 = const()[name = tensor<string, []>("op_5868_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_5868_cast_fp16_19 = split(axis = var_5868_axis_0, split_sizes = tile_65, x = var_5822_cast_fp16)[name = tensor<string, []>("op_5868_cast_fp16")];
+            tensor<string, []> aw_841_equation_0 = const()[name = tensor<string, []>("aw_841_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_841_cast_fp16 = einsum(equation = aw_841_equation_0, values = (var_5847_cast_fp16_0, var_5825_cast_fp16_0))[name = tensor<string, []>("aw_841_cast_fp16")];
+            tensor<string, []> aw_843_equation_0 = const()[name = tensor<string, []>("aw_843_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_843_cast_fp16 = einsum(equation = aw_843_equation_0, values = (var_5847_cast_fp16_1, var_5825_cast_fp16_1))[name = tensor<string, []>("aw_843_cast_fp16")];
+            tensor<string, []> aw_845_equation_0 = const()[name = tensor<string, []>("aw_845_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_845_cast_fp16 = einsum(equation = aw_845_equation_0, values = (var_5847_cast_fp16_2, var_5825_cast_fp16_2))[name = tensor<string, []>("aw_845_cast_fp16")];
+            tensor<string, []> aw_847_equation_0 = const()[name = tensor<string, []>("aw_847_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_847_cast_fp16 = einsum(equation = aw_847_equation_0, values = (var_5847_cast_fp16_3, var_5825_cast_fp16_3))[name = tensor<string, []>("aw_847_cast_fp16")];
+            tensor<string, []> aw_849_equation_0 = const()[name = tensor<string, []>("aw_849_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_849_cast_fp16 = einsum(equation = aw_849_equation_0, values = (var_5847_cast_fp16_4, var_5825_cast_fp16_4))[name = tensor<string, []>("aw_849_cast_fp16")];
+            tensor<string, []> aw_851_equation_0 = const()[name = tensor<string, []>("aw_851_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_851_cast_fp16 = einsum(equation = aw_851_equation_0, values = (var_5847_cast_fp16_5, var_5825_cast_fp16_5))[name = tensor<string, []>("aw_851_cast_fp16")];
+            tensor<string, []> aw_853_equation_0 = const()[name = tensor<string, []>("aw_853_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_853_cast_fp16 = einsum(equation = aw_853_equation_0, values = (var_5847_cast_fp16_6, var_5825_cast_fp16_6))[name = tensor<string, []>("aw_853_cast_fp16")];
+            tensor<string, []> aw_855_equation_0 = const()[name = tensor<string, []>("aw_855_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_855_cast_fp16 = einsum(equation = aw_855_equation_0, values = (var_5847_cast_fp16_7, var_5825_cast_fp16_7))[name = tensor<string, []>("aw_855_cast_fp16")];
+            tensor<string, []> aw_857_equation_0 = const()[name = tensor<string, []>("aw_857_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_857_cast_fp16 = einsum(equation = aw_857_equation_0, values = (var_5847_cast_fp16_8, var_5825_cast_fp16_8))[name = tensor<string, []>("aw_857_cast_fp16")];
+            tensor<string, []> aw_859_equation_0 = const()[name = tensor<string, []>("aw_859_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_859_cast_fp16 = einsum(equation = aw_859_equation_0, values = (var_5847_cast_fp16_9, var_5825_cast_fp16_9))[name = tensor<string, []>("aw_859_cast_fp16")];
+            tensor<string, []> aw_861_equation_0 = const()[name = tensor<string, []>("aw_861_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_861_cast_fp16 = einsum(equation = aw_861_equation_0, values = (var_5847_cast_fp16_10, var_5825_cast_fp16_10))[name = tensor<string, []>("aw_861_cast_fp16")];
+            tensor<string, []> aw_863_equation_0 = const()[name = tensor<string, []>("aw_863_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_863_cast_fp16 = einsum(equation = aw_863_equation_0, values = (var_5847_cast_fp16_11, var_5825_cast_fp16_11))[name = tensor<string, []>("aw_863_cast_fp16")];
+            tensor<string, []> aw_865_equation_0 = const()[name = tensor<string, []>("aw_865_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_865_cast_fp16 = einsum(equation = aw_865_equation_0, values = (var_5847_cast_fp16_12, var_5825_cast_fp16_12))[name = tensor<string, []>("aw_865_cast_fp16")];
+            tensor<string, []> aw_867_equation_0 = const()[name = tensor<string, []>("aw_867_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_867_cast_fp16 = einsum(equation = aw_867_equation_0, values = (var_5847_cast_fp16_13, var_5825_cast_fp16_13))[name = tensor<string, []>("aw_867_cast_fp16")];
+            tensor<string, []> aw_869_equation_0 = const()[name = tensor<string, []>("aw_869_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_869_cast_fp16 = einsum(equation = aw_869_equation_0, values = (var_5847_cast_fp16_14, var_5825_cast_fp16_14))[name = tensor<string, []>("aw_869_cast_fp16")];
+            tensor<string, []> aw_871_equation_0 = const()[name = tensor<string, []>("aw_871_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_871_cast_fp16 = einsum(equation = aw_871_equation_0, values = (var_5847_cast_fp16_15, var_5825_cast_fp16_15))[name = tensor<string, []>("aw_871_cast_fp16")];
+            tensor<string, []> aw_873_equation_0 = const()[name = tensor<string, []>("aw_873_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_873_cast_fp16 = einsum(equation = aw_873_equation_0, values = (var_5847_cast_fp16_16, var_5825_cast_fp16_16))[name = tensor<string, []>("aw_873_cast_fp16")];
+            tensor<string, []> aw_875_equation_0 = const()[name = tensor<string, []>("aw_875_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_875_cast_fp16 = einsum(equation = aw_875_equation_0, values = (var_5847_cast_fp16_17, var_5825_cast_fp16_17))[name = tensor<string, []>("aw_875_cast_fp16")];
+            tensor<string, []> aw_877_equation_0 = const()[name = tensor<string, []>("aw_877_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_877_cast_fp16 = einsum(equation = aw_877_equation_0, values = (var_5847_cast_fp16_18, var_5825_cast_fp16_18))[name = tensor<string, []>("aw_877_cast_fp16")];
+            tensor<string, []> aw_879_equation_0 = const()[name = tensor<string, []>("aw_879_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_879_cast_fp16 = einsum(equation = aw_879_equation_0, values = (var_5847_cast_fp16_19, var_5825_cast_fp16_19))[name = tensor<string, []>("aw_879_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5929_cast_fp16 = softmax(axis = var_5773, x = aw_841_cast_fp16)[name = tensor<string, []>("op_5929_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5930_cast_fp16 = softmax(axis = var_5773, x = aw_843_cast_fp16)[name = tensor<string, []>("op_5930_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5931_cast_fp16 = softmax(axis = var_5773, x = aw_845_cast_fp16)[name = tensor<string, []>("op_5931_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5932_cast_fp16 = softmax(axis = var_5773, x = aw_847_cast_fp16)[name = tensor<string, []>("op_5932_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5933_cast_fp16 = softmax(axis = var_5773, x = aw_849_cast_fp16)[name = tensor<string, []>("op_5933_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5934_cast_fp16 = softmax(axis = var_5773, x = aw_851_cast_fp16)[name = tensor<string, []>("op_5934_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5935_cast_fp16 = softmax(axis = var_5773, x = aw_853_cast_fp16)[name = tensor<string, []>("op_5935_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5936_cast_fp16 = softmax(axis = var_5773, x = aw_855_cast_fp16)[name = tensor<string, []>("op_5936_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5937_cast_fp16 = softmax(axis = var_5773, x = aw_857_cast_fp16)[name = tensor<string, []>("op_5937_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5938_cast_fp16 = softmax(axis = var_5773, x = aw_859_cast_fp16)[name = tensor<string, []>("op_5938_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5939_cast_fp16 = softmax(axis = var_5773, x = aw_861_cast_fp16)[name = tensor<string, []>("op_5939_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5940_cast_fp16 = softmax(axis = var_5773, x = aw_863_cast_fp16)[name = tensor<string, []>("op_5940_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5941_cast_fp16 = softmax(axis = var_5773, x = aw_865_cast_fp16)[name = tensor<string, []>("op_5941_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5942_cast_fp16 = softmax(axis = var_5773, x = aw_867_cast_fp16)[name = tensor<string, []>("op_5942_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5943_cast_fp16 = softmax(axis = var_5773, x = aw_869_cast_fp16)[name = tensor<string, []>("op_5943_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5944_cast_fp16 = softmax(axis = var_5773, x = aw_871_cast_fp16)[name = tensor<string, []>("op_5944_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5945_cast_fp16 = softmax(axis = var_5773, x = aw_873_cast_fp16)[name = tensor<string, []>("op_5945_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5946_cast_fp16 = softmax(axis = var_5773, x = aw_875_cast_fp16)[name = tensor<string, []>("op_5946_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5947_cast_fp16 = softmax(axis = var_5773, x = aw_877_cast_fp16)[name = tensor<string, []>("op_5947_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5948_cast_fp16 = softmax(axis = var_5773, x = aw_879_cast_fp16)[name = tensor<string, []>("op_5948_cast_fp16")];
+            tensor<string, []> var_5950_equation_0 = const()[name = tensor<string, []>("op_5950_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5950_cast_fp16 = einsum(equation = var_5950_equation_0, values = (var_5868_cast_fp16_0, var_5929_cast_fp16))[name = tensor<string, []>("op_5950_cast_fp16")];
+            tensor<string, []> var_5952_equation_0 = const()[name = tensor<string, []>("op_5952_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5952_cast_fp16 = einsum(equation = var_5952_equation_0, values = (var_5868_cast_fp16_1, var_5930_cast_fp16))[name = tensor<string, []>("op_5952_cast_fp16")];
+            tensor<string, []> var_5954_equation_0 = const()[name = tensor<string, []>("op_5954_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5954_cast_fp16 = einsum(equation = var_5954_equation_0, values = (var_5868_cast_fp16_2, var_5931_cast_fp16))[name = tensor<string, []>("op_5954_cast_fp16")];
+            tensor<string, []> var_5956_equation_0 = const()[name = tensor<string, []>("op_5956_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5956_cast_fp16 = einsum(equation = var_5956_equation_0, values = (var_5868_cast_fp16_3, var_5932_cast_fp16))[name = tensor<string, []>("op_5956_cast_fp16")];
+            tensor<string, []> var_5958_equation_0 = const()[name = tensor<string, []>("op_5958_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5958_cast_fp16 = einsum(equation = var_5958_equation_0, values = (var_5868_cast_fp16_4, var_5933_cast_fp16))[name = tensor<string, []>("op_5958_cast_fp16")];
+            tensor<string, []> var_5960_equation_0 = const()[name = tensor<string, []>("op_5960_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5960_cast_fp16 = einsum(equation = var_5960_equation_0, values = (var_5868_cast_fp16_5, var_5934_cast_fp16))[name = tensor<string, []>("op_5960_cast_fp16")];
+            tensor<string, []> var_5962_equation_0 = const()[name = tensor<string, []>("op_5962_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5962_cast_fp16 = einsum(equation = var_5962_equation_0, values = (var_5868_cast_fp16_6, var_5935_cast_fp16))[name = tensor<string, []>("op_5962_cast_fp16")];
+            tensor<string, []> var_5964_equation_0 = const()[name = tensor<string, []>("op_5964_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5964_cast_fp16 = einsum(equation = var_5964_equation_0, values = (var_5868_cast_fp16_7, var_5936_cast_fp16))[name = tensor<string, []>("op_5964_cast_fp16")];
+            tensor<string, []> var_5966_equation_0 = const()[name = tensor<string, []>("op_5966_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5966_cast_fp16 = einsum(equation = var_5966_equation_0, values = (var_5868_cast_fp16_8, var_5937_cast_fp16))[name = tensor<string, []>("op_5966_cast_fp16")];
+            tensor<string, []> var_5968_equation_0 = const()[name = tensor<string, []>("op_5968_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5968_cast_fp16 = einsum(equation = var_5968_equation_0, values = (var_5868_cast_fp16_9, var_5938_cast_fp16))[name = tensor<string, []>("op_5968_cast_fp16")];
+            tensor<string, []> var_5970_equation_0 = const()[name = tensor<string, []>("op_5970_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5970_cast_fp16 = einsum(equation = var_5970_equation_0, values = (var_5868_cast_fp16_10, var_5939_cast_fp16))[name = tensor<string, []>("op_5970_cast_fp16")];
+            tensor<string, []> var_5972_equation_0 = const()[name = tensor<string, []>("op_5972_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5972_cast_fp16 = einsum(equation = var_5972_equation_0, values = (var_5868_cast_fp16_11, var_5940_cast_fp16))[name = tensor<string, []>("op_5972_cast_fp16")];
+            tensor<string, []> var_5974_equation_0 = const()[name = tensor<string, []>("op_5974_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5974_cast_fp16 = einsum(equation = var_5974_equation_0, values = (var_5868_cast_fp16_12, var_5941_cast_fp16))[name = tensor<string, []>("op_5974_cast_fp16")];
+            tensor<string, []> var_5976_equation_0 = const()[name = tensor<string, []>("op_5976_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5976_cast_fp16 = einsum(equation = var_5976_equation_0, values = (var_5868_cast_fp16_13, var_5942_cast_fp16))[name = tensor<string, []>("op_5976_cast_fp16")];
+            tensor<string, []> var_5978_equation_0 = const()[name = tensor<string, []>("op_5978_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5978_cast_fp16 = einsum(equation = var_5978_equation_0, values = (var_5868_cast_fp16_14, var_5943_cast_fp16))[name = tensor<string, []>("op_5978_cast_fp16")];
+            tensor<string, []> var_5980_equation_0 = const()[name = tensor<string, []>("op_5980_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5980_cast_fp16 = einsum(equation = var_5980_equation_0, values = (var_5868_cast_fp16_15, var_5944_cast_fp16))[name = tensor<string, []>("op_5980_cast_fp16")];
+            tensor<string, []> var_5982_equation_0 = const()[name = tensor<string, []>("op_5982_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5982_cast_fp16 = einsum(equation = var_5982_equation_0, values = (var_5868_cast_fp16_16, var_5945_cast_fp16))[name = tensor<string, []>("op_5982_cast_fp16")];
+            tensor<string, []> var_5984_equation_0 = const()[name = tensor<string, []>("op_5984_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5984_cast_fp16 = einsum(equation = var_5984_equation_0, values = (var_5868_cast_fp16_17, var_5946_cast_fp16))[name = tensor<string, []>("op_5984_cast_fp16")];
+            tensor<string, []> var_5986_equation_0 = const()[name = tensor<string, []>("op_5986_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5986_cast_fp16 = einsum(equation = var_5986_equation_0, values = (var_5868_cast_fp16_18, var_5947_cast_fp16))[name = tensor<string, []>("op_5986_cast_fp16")];
+            tensor<string, []> var_5988_equation_0 = const()[name = tensor<string, []>("op_5988_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5988_cast_fp16 = einsum(equation = var_5988_equation_0, values = (var_5868_cast_fp16_19, var_5948_cast_fp16))[name = tensor<string, []>("op_5988_cast_fp16")];
+            tensor<bool, []> input_215_interleave_0 = const()[name = tensor<string, []>("input_215_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_215_cast_fp16 = concat(axis = var_5773, interleave = input_215_interleave_0, values = (var_5950_cast_fp16, var_5952_cast_fp16, var_5954_cast_fp16, var_5956_cast_fp16, var_5958_cast_fp16, var_5960_cast_fp16, var_5962_cast_fp16, var_5964_cast_fp16, var_5966_cast_fp16, var_5968_cast_fp16, var_5970_cast_fp16, var_5972_cast_fp16, var_5974_cast_fp16, var_5976_cast_fp16, var_5978_cast_fp16, var_5980_cast_fp16, var_5982_cast_fp16, var_5984_cast_fp16, var_5986_cast_fp16, var_5988_cast_fp16))[name = tensor<string, []>("input_215_cast_fp16")];
+            tensor<string, []> var_5997_pad_type_0 = const()[name = tensor<string, []>("op_5997_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5997_strides_0 = const()[name = tensor<string, []>("op_5997_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5997_pad_0 = const()[name = tensor<string, []>("op_5997_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5997_dilations_0 = const()[name = tensor<string, []>("op_5997_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5997_groups_0 = const()[name = tensor<string, []>("op_5997_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_21_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(850918912)))];
+            tensor<fp16, [1280]> blocks_21_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(854195776)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5997_cast_fp16 = conv(bias = blocks_21_attn_out_bias_to_fp16, dilations = var_5997_dilations_0, groups = var_5997_groups_0, pad = var_5997_pad_0, pad_type = var_5997_pad_type_0, strides = var_5997_strides_0, weight = blocks_21_attn_out_weight_to_fp16, x = input_215_cast_fp16)[name = tensor<string, []>("op_5997_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_87_cast_fp16 = add(x = inputs_85_cast_fp16, y = var_5997_cast_fp16)[name = tensor<string, []>("inputs_87_cast_fp16")];
+            tensor<int32, [1]> input_217_axes_0 = const()[name = tensor<string, []>("input_217_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_217_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_217_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(854198400)))];
+            tensor<fp16, [1280]> input_217_beta_0_to_fp16 = const()[name = tensor<string, []>("input_217_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(854201024)))];
+            tensor<fp16, []> var_6007_to_fp16 = const()[name = tensor<string, []>("op_6007_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_217_cast_fp16 = layer_norm(axes = input_217_axes_0, beta = input_217_beta_0_to_fp16, epsilon = var_6007_to_fp16, gamma = input_217_gamma_0_to_fp16, x = inputs_87_cast_fp16)[name = tensor<string, []>("input_217_cast_fp16")];
+            tensor<string, []> input_219_pad_type_0 = const()[name = tensor<string, []>("input_219_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_219_strides_0 = const()[name = tensor<string, []>("input_219_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_219_pad_0 = const()[name = tensor<string, []>("input_219_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_219_dilations_0 = const()[name = tensor<string, []>("input_219_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_219_groups_0 = const()[name = tensor<string, []>("input_219_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_21_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(854203648)))];
+            tensor<fp16, [5120]> blocks_21_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(867310912)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_219_cast_fp16 = conv(bias = blocks_21_mlp_0_bias_to_fp16, dilations = input_219_dilations_0, groups = input_219_groups_0, pad = input_219_pad_0, pad_type = input_219_pad_type_0, strides = input_219_strides_0, weight = blocks_21_mlp_0_weight_to_fp16, x = input_217_cast_fp16)[name = tensor<string, []>("input_219_cast_fp16")];
+            tensor<string, []> input_221_mode_0 = const()[name = tensor<string, []>("input_221_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_221_cast_fp16 = gelu(mode = input_221_mode_0, x = input_219_cast_fp16)[name = tensor<string, []>("input_221_cast_fp16")];
+            tensor<string, []> var_6033_pad_type_0 = const()[name = tensor<string, []>("op_6033_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6033_strides_0 = const()[name = tensor<string, []>("op_6033_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6033_pad_0 = const()[name = tensor<string, []>("op_6033_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6033_dilations_0 = const()[name = tensor<string, []>("op_6033_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6033_groups_0 = const()[name = tensor<string, []>("op_6033_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_21_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(867321216)))];
+            tensor<fp16, [1280]> blocks_21_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(880428480)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6033_cast_fp16 = conv(bias = blocks_21_mlp_2_bias_to_fp16, dilations = var_6033_dilations_0, groups = var_6033_groups_0, pad = var_6033_pad_0, pad_type = var_6033_pad_type_0, strides = var_6033_strides_0, weight = blocks_21_mlp_2_weight_to_fp16, x = input_221_cast_fp16)[name = tensor<string, []>("op_6033_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_89_cast_fp16 = add(x = inputs_87_cast_fp16, y = var_6033_cast_fp16)[name = tensor<string, []>("inputs_89_cast_fp16")];
+            tensor<int32, []> var_6042 = const()[name = tensor<string, []>("op_6042"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_223_axes_0 = const()[name = tensor<string, []>("input_223_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_223_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_223_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(880431104)))];
+            tensor<fp16, [1280]> input_223_beta_0_to_fp16 = const()[name = tensor<string, []>("input_223_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(880433728)))];
+            tensor<fp16, []> var_6058_to_fp16 = const()[name = tensor<string, []>("op_6058_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_223_cast_fp16 = layer_norm(axes = input_223_axes_0, beta = input_223_beta_0_to_fp16, epsilon = var_6058_to_fp16, gamma = input_223_gamma_0_to_fp16, x = inputs_89_cast_fp16)[name = tensor<string, []>("input_223_cast_fp16")];
+            tensor<string, []> q_45_pad_type_0 = const()[name = tensor<string, []>("q_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_45_strides_0 = const()[name = tensor<string, []>("q_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_45_pad_0 = const()[name = tensor<string, []>("q_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_45_dilations_0 = const()[name = tensor<string, []>("q_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_45_groups_0 = const()[name = tensor<string, []>("q_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_6093_weight_0_to_fp16 = const()[name = tensor<string, []>("op_6093_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(880436352)))];
+            tensor<fp16, [1280]> var_6093_bias_0_to_fp16 = const()[name = tensor<string, []>("op_6093_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(883713216)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6093_cast_fp16 = conv(bias = var_6093_bias_0_to_fp16, dilations = q_45_dilations_0, groups = q_45_groups_0, pad = q_45_pad_0, pad_type = q_45_pad_type_0, strides = q_45_strides_0, weight = var_6093_weight_0_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("op_6093_cast_fp16")];
+            tensor<string, []> k_45_pad_type_0 = const()[name = tensor<string, []>("k_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_45_strides_0 = const()[name = tensor<string, []>("k_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_45_pad_0 = const()[name = tensor<string, []>("k_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_45_dilations_0 = const()[name = tensor<string, []>("k_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_45_groups_0 = const()[name = tensor<string, []>("k_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_22_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(883715840)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_45_cast_fp16 = conv(dilations = k_45_dilations_0, groups = k_45_groups_0, pad = k_45_pad_0, pad_type = k_45_pad_type_0, strides = k_45_strides_0, weight = blocks_22_attn_key_weight_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("k_45_cast_fp16")];
+            tensor<string, []> var_6091_pad_type_0 = const()[name = tensor<string, []>("op_6091_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6091_strides_0 = const()[name = tensor<string, []>("op_6091_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6091_pad_0 = const()[name = tensor<string, []>("op_6091_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6091_dilations_0 = const()[name = tensor<string, []>("op_6091_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6091_groups_0 = const()[name = tensor<string, []>("op_6091_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_22_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(886992704)))];
+            tensor<fp16, [1280]> blocks_22_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(890269568)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6091_cast_fp16 = conv(bias = blocks_22_attn_value_bias_to_fp16, dilations = var_6091_dilations_0, groups = var_6091_groups_0, pad = var_6091_pad_0, pad_type = var_6091_pad_type_0, strides = var_6091_strides_0, weight = blocks_22_attn_value_weight_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("op_6091_cast_fp16")];
+            tensor<int32, [20]> tile_66 = const()[name = tensor<string, []>("tile_66"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6094_axis_0 = const()[name = tensor<string, []>("op_6094_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6094_cast_fp16_19 = split(axis = var_6094_axis_0, split_sizes = tile_66, x = var_6093_cast_fp16)[name = tensor<string, []>("op_6094_cast_fp16")];
+            tensor<int32, [4]> var_6115_perm_0 = const()[name = tensor<string, []>("op_6115_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_67 = const()[name = tensor<string, []>("tile_67"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6116_axis_0 = const()[name = tensor<string, []>("op_6116_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_6115_cast_fp16 = transpose(perm = var_6115_perm_0, x = k_45_cast_fp16)[name = tensor<string, []>("transpose_10")];
+            tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_6116_cast_fp16_19 = split(axis = var_6116_axis_0, split_sizes = tile_67, x = var_6115_cast_fp16)[name = tensor<string, []>("op_6116_cast_fp16")];
+            tensor<int32, [20]> tile_68 = const()[name = tensor<string, []>("tile_68"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6137_axis_0 = const()[name = tensor<string, []>("op_6137_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6137_cast_fp16_19 = split(axis = var_6137_axis_0, split_sizes = tile_68, x = var_6091_cast_fp16)[name = tensor<string, []>("op_6137_cast_fp16")];
+            tensor<string, []> aw_881_equation_0 = const()[name = tensor<string, []>("aw_881_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_881_cast_fp16 = einsum(equation = aw_881_equation_0, values = (var_6116_cast_fp16_0, var_6094_cast_fp16_0))[name = tensor<string, []>("aw_881_cast_fp16")];
+            tensor<string, []> aw_883_equation_0 = const()[name = tensor<string, []>("aw_883_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_883_cast_fp16 = einsum(equation = aw_883_equation_0, values = (var_6116_cast_fp16_1, var_6094_cast_fp16_1))[name = tensor<string, []>("aw_883_cast_fp16")];
+            tensor<string, []> aw_885_equation_0 = const()[name = tensor<string, []>("aw_885_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_885_cast_fp16 = einsum(equation = aw_885_equation_0, values = (var_6116_cast_fp16_2, var_6094_cast_fp16_2))[name = tensor<string, []>("aw_885_cast_fp16")];
+            tensor<string, []> aw_887_equation_0 = const()[name = tensor<string, []>("aw_887_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_887_cast_fp16 = einsum(equation = aw_887_equation_0, values = (var_6116_cast_fp16_3, var_6094_cast_fp16_3))[name = tensor<string, []>("aw_887_cast_fp16")];
+            tensor<string, []> aw_889_equation_0 = const()[name = tensor<string, []>("aw_889_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_889_cast_fp16 = einsum(equation = aw_889_equation_0, values = (var_6116_cast_fp16_4, var_6094_cast_fp16_4))[name = tensor<string, []>("aw_889_cast_fp16")];
+            tensor<string, []> aw_891_equation_0 = const()[name = tensor<string, []>("aw_891_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_891_cast_fp16 = einsum(equation = aw_891_equation_0, values = (var_6116_cast_fp16_5, var_6094_cast_fp16_5))[name = tensor<string, []>("aw_891_cast_fp16")];
+            tensor<string, []> aw_893_equation_0 = const()[name = tensor<string, []>("aw_893_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_893_cast_fp16 = einsum(equation = aw_893_equation_0, values = (var_6116_cast_fp16_6, var_6094_cast_fp16_6))[name = tensor<string, []>("aw_893_cast_fp16")];
+            tensor<string, []> aw_895_equation_0 = const()[name = tensor<string, []>("aw_895_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_895_cast_fp16 = einsum(equation = aw_895_equation_0, values = (var_6116_cast_fp16_7, var_6094_cast_fp16_7))[name = tensor<string, []>("aw_895_cast_fp16")];
+            tensor<string, []> aw_897_equation_0 = const()[name = tensor<string, []>("aw_897_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_897_cast_fp16 = einsum(equation = aw_897_equation_0, values = (var_6116_cast_fp16_8, var_6094_cast_fp16_8))[name = tensor<string, []>("aw_897_cast_fp16")];
+            tensor<string, []> aw_899_equation_0 = const()[name = tensor<string, []>("aw_899_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_899_cast_fp16 = einsum(equation = aw_899_equation_0, values = (var_6116_cast_fp16_9, var_6094_cast_fp16_9))[name = tensor<string, []>("aw_899_cast_fp16")];
+            tensor<string, []> aw_901_equation_0 = const()[name = tensor<string, []>("aw_901_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_901_cast_fp16 = einsum(equation = aw_901_equation_0, values = (var_6116_cast_fp16_10, var_6094_cast_fp16_10))[name = tensor<string, []>("aw_901_cast_fp16")];
+            tensor<string, []> aw_903_equation_0 = const()[name = tensor<string, []>("aw_903_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_903_cast_fp16 = einsum(equation = aw_903_equation_0, values = (var_6116_cast_fp16_11, var_6094_cast_fp16_11))[name = tensor<string, []>("aw_903_cast_fp16")];
+            tensor<string, []> aw_905_equation_0 = const()[name = tensor<string, []>("aw_905_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_905_cast_fp16 = einsum(equation = aw_905_equation_0, values = (var_6116_cast_fp16_12, var_6094_cast_fp16_12))[name = tensor<string, []>("aw_905_cast_fp16")];
+            tensor<string, []> aw_907_equation_0 = const()[name = tensor<string, []>("aw_907_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_907_cast_fp16 = einsum(equation = aw_907_equation_0, values = (var_6116_cast_fp16_13, var_6094_cast_fp16_13))[name = tensor<string, []>("aw_907_cast_fp16")];
+            tensor<string, []> aw_909_equation_0 = const()[name = tensor<string, []>("aw_909_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_909_cast_fp16 = einsum(equation = aw_909_equation_0, values = (var_6116_cast_fp16_14, var_6094_cast_fp16_14))[name = tensor<string, []>("aw_909_cast_fp16")];
+            tensor<string, []> aw_911_equation_0 = const()[name = tensor<string, []>("aw_911_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_911_cast_fp16 = einsum(equation = aw_911_equation_0, values = (var_6116_cast_fp16_15, var_6094_cast_fp16_15))[name = tensor<string, []>("aw_911_cast_fp16")];
+            tensor<string, []> aw_913_equation_0 = const()[name = tensor<string, []>("aw_913_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_913_cast_fp16 = einsum(equation = aw_913_equation_0, values = (var_6116_cast_fp16_16, var_6094_cast_fp16_16))[name = tensor<string, []>("aw_913_cast_fp16")];
+            tensor<string, []> aw_915_equation_0 = const()[name = tensor<string, []>("aw_915_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_915_cast_fp16 = einsum(equation = aw_915_equation_0, values = (var_6116_cast_fp16_17, var_6094_cast_fp16_17))[name = tensor<string, []>("aw_915_cast_fp16")];
+            tensor<string, []> aw_917_equation_0 = const()[name = tensor<string, []>("aw_917_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_917_cast_fp16 = einsum(equation = aw_917_equation_0, values = (var_6116_cast_fp16_18, var_6094_cast_fp16_18))[name = tensor<string, []>("aw_917_cast_fp16")];
+            tensor<string, []> aw_919_equation_0 = const()[name = tensor<string, []>("aw_919_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_919_cast_fp16 = einsum(equation = aw_919_equation_0, values = (var_6116_cast_fp16_19, var_6094_cast_fp16_19))[name = tensor<string, []>("aw_919_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6198_cast_fp16 = softmax(axis = var_6042, x = aw_881_cast_fp16)[name = tensor<string, []>("op_6198_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6199_cast_fp16 = softmax(axis = var_6042, x = aw_883_cast_fp16)[name = tensor<string, []>("op_6199_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6200_cast_fp16 = softmax(axis = var_6042, x = aw_885_cast_fp16)[name = tensor<string, []>("op_6200_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6201_cast_fp16 = softmax(axis = var_6042, x = aw_887_cast_fp16)[name = tensor<string, []>("op_6201_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6202_cast_fp16 = softmax(axis = var_6042, x = aw_889_cast_fp16)[name = tensor<string, []>("op_6202_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6203_cast_fp16 = softmax(axis = var_6042, x = aw_891_cast_fp16)[name = tensor<string, []>("op_6203_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6204_cast_fp16 = softmax(axis = var_6042, x = aw_893_cast_fp16)[name = tensor<string, []>("op_6204_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6205_cast_fp16 = softmax(axis = var_6042, x = aw_895_cast_fp16)[name = tensor<string, []>("op_6205_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6206_cast_fp16 = softmax(axis = var_6042, x = aw_897_cast_fp16)[name = tensor<string, []>("op_6206_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6207_cast_fp16 = softmax(axis = var_6042, x = aw_899_cast_fp16)[name = tensor<string, []>("op_6207_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6208_cast_fp16 = softmax(axis = var_6042, x = aw_901_cast_fp16)[name = tensor<string, []>("op_6208_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6209_cast_fp16 = softmax(axis = var_6042, x = aw_903_cast_fp16)[name = tensor<string, []>("op_6209_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6210_cast_fp16 = softmax(axis = var_6042, x = aw_905_cast_fp16)[name = tensor<string, []>("op_6210_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6211_cast_fp16 = softmax(axis = var_6042, x = aw_907_cast_fp16)[name = tensor<string, []>("op_6211_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6212_cast_fp16 = softmax(axis = var_6042, x = aw_909_cast_fp16)[name = tensor<string, []>("op_6212_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6213_cast_fp16 = softmax(axis = var_6042, x = aw_911_cast_fp16)[name = tensor<string, []>("op_6213_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6214_cast_fp16 = softmax(axis = var_6042, x = aw_913_cast_fp16)[name = tensor<string, []>("op_6214_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6215_cast_fp16 = softmax(axis = var_6042, x = aw_915_cast_fp16)[name = tensor<string, []>("op_6215_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6216_cast_fp16 = softmax(axis = var_6042, x = aw_917_cast_fp16)[name = tensor<string, []>("op_6216_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6217_cast_fp16 = softmax(axis = var_6042, x = aw_919_cast_fp16)[name = tensor<string, []>("op_6217_cast_fp16")];
+            tensor<string, []> var_6219_equation_0 = const()[name = tensor<string, []>("op_6219_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6219_cast_fp16 = einsum(equation = var_6219_equation_0, values = (var_6137_cast_fp16_0, var_6198_cast_fp16))[name = tensor<string, []>("op_6219_cast_fp16")];
+            tensor<string, []> var_6221_equation_0 = const()[name = tensor<string, []>("op_6221_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6221_cast_fp16 = einsum(equation = var_6221_equation_0, values = (var_6137_cast_fp16_1, var_6199_cast_fp16))[name = tensor<string, []>("op_6221_cast_fp16")];
+            tensor<string, []> var_6223_equation_0 = const()[name = tensor<string, []>("op_6223_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6223_cast_fp16 = einsum(equation = var_6223_equation_0, values = (var_6137_cast_fp16_2, var_6200_cast_fp16))[name = tensor<string, []>("op_6223_cast_fp16")];
+            tensor<string, []> var_6225_equation_0 = const()[name = tensor<string, []>("op_6225_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6225_cast_fp16 = einsum(equation = var_6225_equation_0, values = (var_6137_cast_fp16_3, var_6201_cast_fp16))[name = tensor<string, []>("op_6225_cast_fp16")];
+            tensor<string, []> var_6227_equation_0 = const()[name = tensor<string, []>("op_6227_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6227_cast_fp16 = einsum(equation = var_6227_equation_0, values = (var_6137_cast_fp16_4, var_6202_cast_fp16))[name = tensor<string, []>("op_6227_cast_fp16")];
+            tensor<string, []> var_6229_equation_0 = const()[name = tensor<string, []>("op_6229_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6229_cast_fp16 = einsum(equation = var_6229_equation_0, values = (var_6137_cast_fp16_5, var_6203_cast_fp16))[name = tensor<string, []>("op_6229_cast_fp16")];
+            tensor<string, []> var_6231_equation_0 = const()[name = tensor<string, []>("op_6231_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6231_cast_fp16 = einsum(equation = var_6231_equation_0, values = (var_6137_cast_fp16_6, var_6204_cast_fp16))[name = tensor<string, []>("op_6231_cast_fp16")];
+            tensor<string, []> var_6233_equation_0 = const()[name = tensor<string, []>("op_6233_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6233_cast_fp16 = einsum(equation = var_6233_equation_0, values = (var_6137_cast_fp16_7, var_6205_cast_fp16))[name = tensor<string, []>("op_6233_cast_fp16")];
+            tensor<string, []> var_6235_equation_0 = const()[name = tensor<string, []>("op_6235_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6235_cast_fp16 = einsum(equation = var_6235_equation_0, values = (var_6137_cast_fp16_8, var_6206_cast_fp16))[name = tensor<string, []>("op_6235_cast_fp16")];
+            tensor<string, []> var_6237_equation_0 = const()[name = tensor<string, []>("op_6237_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6237_cast_fp16 = einsum(equation = var_6237_equation_0, values = (var_6137_cast_fp16_9, var_6207_cast_fp16))[name = tensor<string, []>("op_6237_cast_fp16")];
+            tensor<string, []> var_6239_equation_0 = const()[name = tensor<string, []>("op_6239_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6239_cast_fp16 = einsum(equation = var_6239_equation_0, values = (var_6137_cast_fp16_10, var_6208_cast_fp16))[name = tensor<string, []>("op_6239_cast_fp16")];
+            tensor<string, []> var_6241_equation_0 = const()[name = tensor<string, []>("op_6241_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6241_cast_fp16 = einsum(equation = var_6241_equation_0, values = (var_6137_cast_fp16_11, var_6209_cast_fp16))[name = tensor<string, []>("op_6241_cast_fp16")];
+            tensor<string, []> var_6243_equation_0 = const()[name = tensor<string, []>("op_6243_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6243_cast_fp16 = einsum(equation = var_6243_equation_0, values = (var_6137_cast_fp16_12, var_6210_cast_fp16))[name = tensor<string, []>("op_6243_cast_fp16")];
+            tensor<string, []> var_6245_equation_0 = const()[name = tensor<string, []>("op_6245_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6245_cast_fp16 = einsum(equation = var_6245_equation_0, values = (var_6137_cast_fp16_13, var_6211_cast_fp16))[name = tensor<string, []>("op_6245_cast_fp16")];
+            tensor<string, []> var_6247_equation_0 = const()[name = tensor<string, []>("op_6247_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6247_cast_fp16 = einsum(equation = var_6247_equation_0, values = (var_6137_cast_fp16_14, var_6212_cast_fp16))[name = tensor<string, []>("op_6247_cast_fp16")];
+            tensor<string, []> var_6249_equation_0 = const()[name = tensor<string, []>("op_6249_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6249_cast_fp16 = einsum(equation = var_6249_equation_0, values = (var_6137_cast_fp16_15, var_6213_cast_fp16))[name = tensor<string, []>("op_6249_cast_fp16")];
+            tensor<string, []> var_6251_equation_0 = const()[name = tensor<string, []>("op_6251_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6251_cast_fp16 = einsum(equation = var_6251_equation_0, values = (var_6137_cast_fp16_16, var_6214_cast_fp16))[name = tensor<string, []>("op_6251_cast_fp16")];
+            tensor<string, []> var_6253_equation_0 = const()[name = tensor<string, []>("op_6253_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6253_cast_fp16 = einsum(equation = var_6253_equation_0, values = (var_6137_cast_fp16_17, var_6215_cast_fp16))[name = tensor<string, []>("op_6253_cast_fp16")];
+            tensor<string, []> var_6255_equation_0 = const()[name = tensor<string, []>("op_6255_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6255_cast_fp16 = einsum(equation = var_6255_equation_0, values = (var_6137_cast_fp16_18, var_6216_cast_fp16))[name = tensor<string, []>("op_6255_cast_fp16")];
+            tensor<string, []> var_6257_equation_0 = const()[name = tensor<string, []>("op_6257_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6257_cast_fp16 = einsum(equation = var_6257_equation_0, values = (var_6137_cast_fp16_19, var_6217_cast_fp16))[name = tensor<string, []>("op_6257_cast_fp16")];
+            tensor<bool, []> input_225_interleave_0 = const()[name = tensor<string, []>("input_225_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_225_cast_fp16 = concat(axis = var_6042, interleave = input_225_interleave_0, values = (var_6219_cast_fp16, var_6221_cast_fp16, var_6223_cast_fp16, var_6225_cast_fp16, var_6227_cast_fp16, var_6229_cast_fp16, var_6231_cast_fp16, var_6233_cast_fp16, var_6235_cast_fp16, var_6237_cast_fp16, var_6239_cast_fp16, var_6241_cast_fp16, var_6243_cast_fp16, var_6245_cast_fp16, var_6247_cast_fp16, var_6249_cast_fp16, var_6251_cast_fp16, var_6253_cast_fp16, var_6255_cast_fp16, var_6257_cast_fp16))[name = tensor<string, []>("input_225_cast_fp16")];
+            tensor<string, []> var_6266_pad_type_0 = const()[name = tensor<string, []>("op_6266_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6266_strides_0 = const()[name = tensor<string, []>("op_6266_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6266_pad_0 = const()[name = tensor<string, []>("op_6266_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6266_dilations_0 = const()[name = tensor<string, []>("op_6266_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6266_groups_0 = const()[name = tensor<string, []>("op_6266_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_22_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(890272192)))];
+            tensor<fp16, [1280]> blocks_22_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(893549056)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6266_cast_fp16 = conv(bias = blocks_22_attn_out_bias_to_fp16, dilations = var_6266_dilations_0, groups = var_6266_groups_0, pad = var_6266_pad_0, pad_type = var_6266_pad_type_0, strides = var_6266_strides_0, weight = blocks_22_attn_out_weight_to_fp16, x = input_225_cast_fp16)[name = tensor<string, []>("op_6266_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_91_cast_fp16 = add(x = inputs_89_cast_fp16, y = var_6266_cast_fp16)[name = tensor<string, []>("inputs_91_cast_fp16")];
+            tensor<int32, [1]> input_227_axes_0 = const()[name = tensor<string, []>("input_227_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_227_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_227_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(893551680)))];
+            tensor<fp16, [1280]> input_227_beta_0_to_fp16 = const()[name = tensor<string, []>("input_227_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(893554304)))];
+            tensor<fp16, []> var_6276_to_fp16 = const()[name = tensor<string, []>("op_6276_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_227_cast_fp16 = layer_norm(axes = input_227_axes_0, beta = input_227_beta_0_to_fp16, epsilon = var_6276_to_fp16, gamma = input_227_gamma_0_to_fp16, x = inputs_91_cast_fp16)[name = tensor<string, []>("input_227_cast_fp16")];
+            tensor<string, []> input_229_pad_type_0 = const()[name = tensor<string, []>("input_229_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_229_strides_0 = const()[name = tensor<string, []>("input_229_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_229_pad_0 = const()[name = tensor<string, []>("input_229_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_229_dilations_0 = const()[name = tensor<string, []>("input_229_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_229_groups_0 = const()[name = tensor<string, []>("input_229_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_22_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(893556928)))];
+            tensor<fp16, [5120]> blocks_22_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(906664192)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_229_cast_fp16 = conv(bias = blocks_22_mlp_0_bias_to_fp16, dilations = input_229_dilations_0, groups = input_229_groups_0, pad = input_229_pad_0, pad_type = input_229_pad_type_0, strides = input_229_strides_0, weight = blocks_22_mlp_0_weight_to_fp16, x = input_227_cast_fp16)[name = tensor<string, []>("input_229_cast_fp16")];
+            tensor<string, []> input_231_mode_0 = const()[name = tensor<string, []>("input_231_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_231_cast_fp16 = gelu(mode = input_231_mode_0, x = input_229_cast_fp16)[name = tensor<string, []>("input_231_cast_fp16")];
+            tensor<string, []> var_6302_pad_type_0 = const()[name = tensor<string, []>("op_6302_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6302_strides_0 = const()[name = tensor<string, []>("op_6302_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6302_pad_0 = const()[name = tensor<string, []>("op_6302_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6302_dilations_0 = const()[name = tensor<string, []>("op_6302_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6302_groups_0 = const()[name = tensor<string, []>("op_6302_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_22_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(906674496)))];
+            tensor<fp16, [1280]> blocks_22_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(919781760)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6302_cast_fp16 = conv(bias = blocks_22_mlp_2_bias_to_fp16, dilations = var_6302_dilations_0, groups = var_6302_groups_0, pad = var_6302_pad_0, pad_type = var_6302_pad_type_0, strides = var_6302_strides_0, weight = blocks_22_mlp_2_weight_to_fp16, x = input_231_cast_fp16)[name = tensor<string, []>("op_6302_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_93_cast_fp16 = add(x = inputs_91_cast_fp16, y = var_6302_cast_fp16)[name = tensor<string, []>("inputs_93_cast_fp16")];
+            tensor<int32, []> var_6311 = const()[name = tensor<string, []>("op_6311"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_233_axes_0 = const()[name = tensor<string, []>("input_233_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_233_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_233_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(919784384)))];
+            tensor<fp16, [1280]> input_233_beta_0_to_fp16 = const()[name = tensor<string, []>("input_233_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(919787008)))];
+            tensor<fp16, []> var_6327_to_fp16 = const()[name = tensor<string, []>("op_6327_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_233_cast_fp16 = layer_norm(axes = input_233_axes_0, beta = input_233_beta_0_to_fp16, epsilon = var_6327_to_fp16, gamma = input_233_gamma_0_to_fp16, x = inputs_93_cast_fp16)[name = tensor<string, []>("input_233_cast_fp16")];
+            tensor<string, []> q_47_pad_type_0 = const()[name = tensor<string, []>("q_47_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_47_strides_0 = const()[name = tensor<string, []>("q_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_47_pad_0 = const()[name = tensor<string, []>("q_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_47_dilations_0 = const()[name = tensor<string, []>("q_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_47_groups_0 = const()[name = tensor<string, []>("q_47_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_6362_weight_0_to_fp16 = const()[name = tensor<string, []>("op_6362_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(919789632)))];
+            tensor<fp16, [1280]> var_6362_bias_0_to_fp16 = const()[name = tensor<string, []>("op_6362_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(923066496)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6362_cast_fp16 = conv(bias = var_6362_bias_0_to_fp16, dilations = q_47_dilations_0, groups = q_47_groups_0, pad = q_47_pad_0, pad_type = q_47_pad_type_0, strides = q_47_strides_0, weight = var_6362_weight_0_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("op_6362_cast_fp16")];
+            tensor<string, []> k_47_pad_type_0 = const()[name = tensor<string, []>("k_47_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_47_strides_0 = const()[name = tensor<string, []>("k_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_47_pad_0 = const()[name = tensor<string, []>("k_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_47_dilations_0 = const()[name = tensor<string, []>("k_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_47_groups_0 = const()[name = tensor<string, []>("k_47_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_23_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(923069120)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_47_cast_fp16 = conv(dilations = k_47_dilations_0, groups = k_47_groups_0, pad = k_47_pad_0, pad_type = k_47_pad_type_0, strides = k_47_strides_0, weight = blocks_23_attn_key_weight_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("k_47_cast_fp16")];
+            tensor<string, []> var_6360_pad_type_0 = const()[name = tensor<string, []>("op_6360_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6360_strides_0 = const()[name = tensor<string, []>("op_6360_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6360_pad_0 = const()[name = tensor<string, []>("op_6360_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6360_dilations_0 = const()[name = tensor<string, []>("op_6360_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6360_groups_0 = const()[name = tensor<string, []>("op_6360_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_23_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(926345984)))];
+            tensor<fp16, [1280]> blocks_23_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(929622848)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6360_cast_fp16 = conv(bias = blocks_23_attn_value_bias_to_fp16, dilations = var_6360_dilations_0, groups = var_6360_groups_0, pad = var_6360_pad_0, pad_type = var_6360_pad_type_0, strides = var_6360_strides_0, weight = blocks_23_attn_value_weight_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("op_6360_cast_fp16")];
+            tensor<int32, [20]> tile_69 = const()[name = tensor<string, []>("tile_69"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6363_axis_0 = const()[name = tensor<string, []>("op_6363_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6363_cast_fp16_19 = split(axis = var_6363_axis_0, split_sizes = tile_69, x = var_6362_cast_fp16)[name = tensor<string, []>("op_6363_cast_fp16")];
+            tensor<int32, [4]> var_6384_perm_0 = const()[name = tensor<string, []>("op_6384_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_70 = const()[name = tensor<string, []>("tile_70"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6385_axis_0 = const()[name = tensor<string, []>("op_6385_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_6384_cast_fp16 = transpose(perm = var_6384_perm_0, x = k_47_cast_fp16)[name = tensor<string, []>("transpose_9")];
+            tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_6385_cast_fp16_19 = split(axis = var_6385_axis_0, split_sizes = tile_70, x = var_6384_cast_fp16)[name = tensor<string, []>("op_6385_cast_fp16")];
+            tensor<int32, [20]> tile_71 = const()[name = tensor<string, []>("tile_71"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6406_axis_0 = const()[name = tensor<string, []>("op_6406_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6406_cast_fp16_19 = split(axis = var_6406_axis_0, split_sizes = tile_71, x = var_6360_cast_fp16)[name = tensor<string, []>("op_6406_cast_fp16")];
+            tensor<string, []> aw_921_equation_0 = const()[name = tensor<string, []>("aw_921_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_921_cast_fp16 = einsum(equation = aw_921_equation_0, values = (var_6385_cast_fp16_0, var_6363_cast_fp16_0))[name = tensor<string, []>("aw_921_cast_fp16")];
+            tensor<string, []> aw_923_equation_0 = const()[name = tensor<string, []>("aw_923_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_923_cast_fp16 = einsum(equation = aw_923_equation_0, values = (var_6385_cast_fp16_1, var_6363_cast_fp16_1))[name = tensor<string, []>("aw_923_cast_fp16")];
+            tensor<string, []> aw_925_equation_0 = const()[name = tensor<string, []>("aw_925_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_925_cast_fp16 = einsum(equation = aw_925_equation_0, values = (var_6385_cast_fp16_2, var_6363_cast_fp16_2))[name = tensor<string, []>("aw_925_cast_fp16")];
+            tensor<string, []> aw_927_equation_0 = const()[name = tensor<string, []>("aw_927_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_927_cast_fp16 = einsum(equation = aw_927_equation_0, values = (var_6385_cast_fp16_3, var_6363_cast_fp16_3))[name = tensor<string, []>("aw_927_cast_fp16")];
+            tensor<string, []> aw_929_equation_0 = const()[name = tensor<string, []>("aw_929_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_929_cast_fp16 = einsum(equation = aw_929_equation_0, values = (var_6385_cast_fp16_4, var_6363_cast_fp16_4))[name = tensor<string, []>("aw_929_cast_fp16")];
+            tensor<string, []> aw_931_equation_0 = const()[name = tensor<string, []>("aw_931_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_931_cast_fp16 = einsum(equation = aw_931_equation_0, values = (var_6385_cast_fp16_5, var_6363_cast_fp16_5))[name = tensor<string, []>("aw_931_cast_fp16")];
+            tensor<string, []> aw_933_equation_0 = const()[name = tensor<string, []>("aw_933_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_933_cast_fp16 = einsum(equation = aw_933_equation_0, values = (var_6385_cast_fp16_6, var_6363_cast_fp16_6))[name = tensor<string, []>("aw_933_cast_fp16")];
+            tensor<string, []> aw_935_equation_0 = const()[name = tensor<string, []>("aw_935_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_935_cast_fp16 = einsum(equation = aw_935_equation_0, values = (var_6385_cast_fp16_7, var_6363_cast_fp16_7))[name = tensor<string, []>("aw_935_cast_fp16")];
+            tensor<string, []> aw_937_equation_0 = const()[name = tensor<string, []>("aw_937_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_937_cast_fp16 = einsum(equation = aw_937_equation_0, values = (var_6385_cast_fp16_8, var_6363_cast_fp16_8))[name = tensor<string, []>("aw_937_cast_fp16")];
+            tensor<string, []> aw_939_equation_0 = const()[name = tensor<string, []>("aw_939_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_939_cast_fp16 = einsum(equation = aw_939_equation_0, values = (var_6385_cast_fp16_9, var_6363_cast_fp16_9))[name = tensor<string, []>("aw_939_cast_fp16")];
+            tensor<string, []> aw_941_equation_0 = const()[name = tensor<string, []>("aw_941_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_941_cast_fp16 = einsum(equation = aw_941_equation_0, values = (var_6385_cast_fp16_10, var_6363_cast_fp16_10))[name = tensor<string, []>("aw_941_cast_fp16")];
+            tensor<string, []> aw_943_equation_0 = const()[name = tensor<string, []>("aw_943_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_943_cast_fp16 = einsum(equation = aw_943_equation_0, values = (var_6385_cast_fp16_11, var_6363_cast_fp16_11))[name = tensor<string, []>("aw_943_cast_fp16")];
+            tensor<string, []> aw_945_equation_0 = const()[name = tensor<string, []>("aw_945_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_945_cast_fp16 = einsum(equation = aw_945_equation_0, values = (var_6385_cast_fp16_12, var_6363_cast_fp16_12))[name = tensor<string, []>("aw_945_cast_fp16")];
+            tensor<string, []> aw_947_equation_0 = const()[name = tensor<string, []>("aw_947_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_947_cast_fp16 = einsum(equation = aw_947_equation_0, values = (var_6385_cast_fp16_13, var_6363_cast_fp16_13))[name = tensor<string, []>("aw_947_cast_fp16")];
+            tensor<string, []> aw_949_equation_0 = const()[name = tensor<string, []>("aw_949_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_949_cast_fp16 = einsum(equation = aw_949_equation_0, values = (var_6385_cast_fp16_14, var_6363_cast_fp16_14))[name = tensor<string, []>("aw_949_cast_fp16")];
+            tensor<string, []> aw_951_equation_0 = const()[name = tensor<string, []>("aw_951_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_951_cast_fp16 = einsum(equation = aw_951_equation_0, values = (var_6385_cast_fp16_15, var_6363_cast_fp16_15))[name = tensor<string, []>("aw_951_cast_fp16")];
+            tensor<string, []> aw_953_equation_0 = const()[name = tensor<string, []>("aw_953_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_953_cast_fp16 = einsum(equation = aw_953_equation_0, values = (var_6385_cast_fp16_16, var_6363_cast_fp16_16))[name = tensor<string, []>("aw_953_cast_fp16")];
+            tensor<string, []> aw_955_equation_0 = const()[name = tensor<string, []>("aw_955_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_955_cast_fp16 = einsum(equation = aw_955_equation_0, values = (var_6385_cast_fp16_17, var_6363_cast_fp16_17))[name = tensor<string, []>("aw_955_cast_fp16")];
+            tensor<string, []> aw_957_equation_0 = const()[name = tensor<string, []>("aw_957_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_957_cast_fp16 = einsum(equation = aw_957_equation_0, values = (var_6385_cast_fp16_18, var_6363_cast_fp16_18))[name = tensor<string, []>("aw_957_cast_fp16")];
+            tensor<string, []> aw_959_equation_0 = const()[name = tensor<string, []>("aw_959_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_959_cast_fp16 = einsum(equation = aw_959_equation_0, values = (var_6385_cast_fp16_19, var_6363_cast_fp16_19))[name = tensor<string, []>("aw_959_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6467_cast_fp16 = softmax(axis = var_6311, x = aw_921_cast_fp16)[name = tensor<string, []>("op_6467_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6468_cast_fp16 = softmax(axis = var_6311, x = aw_923_cast_fp16)[name = tensor<string, []>("op_6468_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6469_cast_fp16 = softmax(axis = var_6311, x = aw_925_cast_fp16)[name = tensor<string, []>("op_6469_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6470_cast_fp16 = softmax(axis = var_6311, x = aw_927_cast_fp16)[name = tensor<string, []>("op_6470_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6471_cast_fp16 = softmax(axis = var_6311, x = aw_929_cast_fp16)[name = tensor<string, []>("op_6471_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6472_cast_fp16 = softmax(axis = var_6311, x = aw_931_cast_fp16)[name = tensor<string, []>("op_6472_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6473_cast_fp16 = softmax(axis = var_6311, x = aw_933_cast_fp16)[name = tensor<string, []>("op_6473_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6474_cast_fp16 = softmax(axis = var_6311, x = aw_935_cast_fp16)[name = tensor<string, []>("op_6474_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6475_cast_fp16 = softmax(axis = var_6311, x = aw_937_cast_fp16)[name = tensor<string, []>("op_6475_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6476_cast_fp16 = softmax(axis = var_6311, x = aw_939_cast_fp16)[name = tensor<string, []>("op_6476_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6477_cast_fp16 = softmax(axis = var_6311, x = aw_941_cast_fp16)[name = tensor<string, []>("op_6477_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6478_cast_fp16 = softmax(axis = var_6311, x = aw_943_cast_fp16)[name = tensor<string, []>("op_6478_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6479_cast_fp16 = softmax(axis = var_6311, x = aw_945_cast_fp16)[name = tensor<string, []>("op_6479_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6480_cast_fp16 = softmax(axis = var_6311, x = aw_947_cast_fp16)[name = tensor<string, []>("op_6480_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6481_cast_fp16 = softmax(axis = var_6311, x = aw_949_cast_fp16)[name = tensor<string, []>("op_6481_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6482_cast_fp16 = softmax(axis = var_6311, x = aw_951_cast_fp16)[name = tensor<string, []>("op_6482_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6483_cast_fp16 = softmax(axis = var_6311, x = aw_953_cast_fp16)[name = tensor<string, []>("op_6483_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6484_cast_fp16 = softmax(axis = var_6311, x = aw_955_cast_fp16)[name = tensor<string, []>("op_6484_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6485_cast_fp16 = softmax(axis = var_6311, x = aw_957_cast_fp16)[name = tensor<string, []>("op_6485_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6486_cast_fp16 = softmax(axis = var_6311, x = aw_959_cast_fp16)[name = tensor<string, []>("op_6486_cast_fp16")];
+            tensor<string, []> var_6488_equation_0 = const()[name = tensor<string, []>("op_6488_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6488_cast_fp16 = einsum(equation = var_6488_equation_0, values = (var_6406_cast_fp16_0, var_6467_cast_fp16))[name = tensor<string, []>("op_6488_cast_fp16")];
+            tensor<string, []> var_6490_equation_0 = const()[name = tensor<string, []>("op_6490_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6490_cast_fp16 = einsum(equation = var_6490_equation_0, values = (var_6406_cast_fp16_1, var_6468_cast_fp16))[name = tensor<string, []>("op_6490_cast_fp16")];
+            tensor<string, []> var_6492_equation_0 = const()[name = tensor<string, []>("op_6492_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6492_cast_fp16 = einsum(equation = var_6492_equation_0, values = (var_6406_cast_fp16_2, var_6469_cast_fp16))[name = tensor<string, []>("op_6492_cast_fp16")];
+            tensor<string, []> var_6494_equation_0 = const()[name = tensor<string, []>("op_6494_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6494_cast_fp16 = einsum(equation = var_6494_equation_0, values = (var_6406_cast_fp16_3, var_6470_cast_fp16))[name = tensor<string, []>("op_6494_cast_fp16")];
+            tensor<string, []> var_6496_equation_0 = const()[name = tensor<string, []>("op_6496_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6496_cast_fp16 = einsum(equation = var_6496_equation_0, values = (var_6406_cast_fp16_4, var_6471_cast_fp16))[name = tensor<string, []>("op_6496_cast_fp16")];
+            tensor<string, []> var_6498_equation_0 = const()[name = tensor<string, []>("op_6498_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6498_cast_fp16 = einsum(equation = var_6498_equation_0, values = (var_6406_cast_fp16_5, var_6472_cast_fp16))[name = tensor<string, []>("op_6498_cast_fp16")];
+            tensor<string, []> var_6500_equation_0 = const()[name = tensor<string, []>("op_6500_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6500_cast_fp16 = einsum(equation = var_6500_equation_0, values = (var_6406_cast_fp16_6, var_6473_cast_fp16))[name = tensor<string, []>("op_6500_cast_fp16")];
+            tensor<string, []> var_6502_equation_0 = const()[name = tensor<string, []>("op_6502_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6502_cast_fp16 = einsum(equation = var_6502_equation_0, values = (var_6406_cast_fp16_7, var_6474_cast_fp16))[name = tensor<string, []>("op_6502_cast_fp16")];
+            tensor<string, []> var_6504_equation_0 = const()[name = tensor<string, []>("op_6504_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6504_cast_fp16 = einsum(equation = var_6504_equation_0, values = (var_6406_cast_fp16_8, var_6475_cast_fp16))[name = tensor<string, []>("op_6504_cast_fp16")];
+            tensor<string, []> var_6506_equation_0 = const()[name = tensor<string, []>("op_6506_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6506_cast_fp16 = einsum(equation = var_6506_equation_0, values = (var_6406_cast_fp16_9, var_6476_cast_fp16))[name = tensor<string, []>("op_6506_cast_fp16")];
+            tensor<string, []> var_6508_equation_0 = const()[name = tensor<string, []>("op_6508_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6508_cast_fp16 = einsum(equation = var_6508_equation_0, values = (var_6406_cast_fp16_10, var_6477_cast_fp16))[name = tensor<string, []>("op_6508_cast_fp16")];
+            tensor<string, []> var_6510_equation_0 = const()[name = tensor<string, []>("op_6510_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6510_cast_fp16 = einsum(equation = var_6510_equation_0, values = (var_6406_cast_fp16_11, var_6478_cast_fp16))[name = tensor<string, []>("op_6510_cast_fp16")];
+            tensor<string, []> var_6512_equation_0 = const()[name = tensor<string, []>("op_6512_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6512_cast_fp16 = einsum(equation = var_6512_equation_0, values = (var_6406_cast_fp16_12, var_6479_cast_fp16))[name = tensor<string, []>("op_6512_cast_fp16")];
+            tensor<string, []> var_6514_equation_0 = const()[name = tensor<string, []>("op_6514_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6514_cast_fp16 = einsum(equation = var_6514_equation_0, values = (var_6406_cast_fp16_13, var_6480_cast_fp16))[name = tensor<string, []>("op_6514_cast_fp16")];
+            tensor<string, []> var_6516_equation_0 = const()[name = tensor<string, []>("op_6516_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6516_cast_fp16 = einsum(equation = var_6516_equation_0, values = (var_6406_cast_fp16_14, var_6481_cast_fp16))[name = tensor<string, []>("op_6516_cast_fp16")];
+            tensor<string, []> var_6518_equation_0 = const()[name = tensor<string, []>("op_6518_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6518_cast_fp16 = einsum(equation = var_6518_equation_0, values = (var_6406_cast_fp16_15, var_6482_cast_fp16))[name = tensor<string, []>("op_6518_cast_fp16")];
+            tensor<string, []> var_6520_equation_0 = const()[name = tensor<string, []>("op_6520_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6520_cast_fp16 = einsum(equation = var_6520_equation_0, values = (var_6406_cast_fp16_16, var_6483_cast_fp16))[name = tensor<string, []>("op_6520_cast_fp16")];
+            tensor<string, []> var_6522_equation_0 = const()[name = tensor<string, []>("op_6522_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6522_cast_fp16 = einsum(equation = var_6522_equation_0, values = (var_6406_cast_fp16_17, var_6484_cast_fp16))[name = tensor<string, []>("op_6522_cast_fp16")];
+            tensor<string, []> var_6524_equation_0 = const()[name = tensor<string, []>("op_6524_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6524_cast_fp16 = einsum(equation = var_6524_equation_0, values = (var_6406_cast_fp16_18, var_6485_cast_fp16))[name = tensor<string, []>("op_6524_cast_fp16")];
+            tensor<string, []> var_6526_equation_0 = const()[name = tensor<string, []>("op_6526_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6526_cast_fp16 = einsum(equation = var_6526_equation_0, values = (var_6406_cast_fp16_19, var_6486_cast_fp16))[name = tensor<string, []>("op_6526_cast_fp16")];
+            tensor<bool, []> input_235_interleave_0 = const()[name = tensor<string, []>("input_235_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_235_cast_fp16 = concat(axis = var_6311, interleave = input_235_interleave_0, values = (var_6488_cast_fp16, var_6490_cast_fp16, var_6492_cast_fp16, var_6494_cast_fp16, var_6496_cast_fp16, var_6498_cast_fp16, var_6500_cast_fp16, var_6502_cast_fp16, var_6504_cast_fp16, var_6506_cast_fp16, var_6508_cast_fp16, var_6510_cast_fp16, var_6512_cast_fp16, var_6514_cast_fp16, var_6516_cast_fp16, var_6518_cast_fp16, var_6520_cast_fp16, var_6522_cast_fp16, var_6524_cast_fp16, var_6526_cast_fp16))[name = tensor<string, []>("input_235_cast_fp16")];
+            tensor<string, []> var_6535_pad_type_0 = const()[name = tensor<string, []>("op_6535_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6535_strides_0 = const()[name = tensor<string, []>("op_6535_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6535_pad_0 = const()[name = tensor<string, []>("op_6535_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6535_dilations_0 = const()[name = tensor<string, []>("op_6535_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6535_groups_0 = const()[name = tensor<string, []>("op_6535_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_23_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(929625472)))];
+            tensor<fp16, [1280]> blocks_23_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(932902336)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6535_cast_fp16 = conv(bias = blocks_23_attn_out_bias_to_fp16, dilations = var_6535_dilations_0, groups = var_6535_groups_0, pad = var_6535_pad_0, pad_type = var_6535_pad_type_0, strides = var_6535_strides_0, weight = blocks_23_attn_out_weight_to_fp16, x = input_235_cast_fp16)[name = tensor<string, []>("op_6535_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_95_cast_fp16 = add(x = inputs_93_cast_fp16, y = var_6535_cast_fp16)[name = tensor<string, []>("inputs_95_cast_fp16")];
+            tensor<int32, [1]> input_237_axes_0 = const()[name = tensor<string, []>("input_237_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_237_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_237_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(932904960)))];
+            tensor<fp16, [1280]> input_237_beta_0_to_fp16 = const()[name = tensor<string, []>("input_237_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(932907584)))];
+            tensor<fp16, []> var_6545_to_fp16 = const()[name = tensor<string, []>("op_6545_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_237_cast_fp16 = layer_norm(axes = input_237_axes_0, beta = input_237_beta_0_to_fp16, epsilon = var_6545_to_fp16, gamma = input_237_gamma_0_to_fp16, x = inputs_95_cast_fp16)[name = tensor<string, []>("input_237_cast_fp16")];
+            tensor<string, []> input_239_pad_type_0 = const()[name = tensor<string, []>("input_239_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_239_strides_0 = const()[name = tensor<string, []>("input_239_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_239_pad_0 = const()[name = tensor<string, []>("input_239_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_239_dilations_0 = const()[name = tensor<string, []>("input_239_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_239_groups_0 = const()[name = tensor<string, []>("input_239_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_23_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(932910208)))];
+            tensor<fp16, [5120]> blocks_23_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(946017472)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_239_cast_fp16 = conv(bias = blocks_23_mlp_0_bias_to_fp16, dilations = input_239_dilations_0, groups = input_239_groups_0, pad = input_239_pad_0, pad_type = input_239_pad_type_0, strides = input_239_strides_0, weight = blocks_23_mlp_0_weight_to_fp16, x = input_237_cast_fp16)[name = tensor<string, []>("input_239_cast_fp16")];
+            tensor<string, []> input_241_mode_0 = const()[name = tensor<string, []>("input_241_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_241_cast_fp16 = gelu(mode = input_241_mode_0, x = input_239_cast_fp16)[name = tensor<string, []>("input_241_cast_fp16")];
+            tensor<string, []> var_6571_pad_type_0 = const()[name = tensor<string, []>("op_6571_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6571_strides_0 = const()[name = tensor<string, []>("op_6571_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6571_pad_0 = const()[name = tensor<string, []>("op_6571_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6571_dilations_0 = const()[name = tensor<string, []>("op_6571_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6571_groups_0 = const()[name = tensor<string, []>("op_6571_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_23_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(946027776)))];
+            tensor<fp16, [1280]> blocks_23_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(959135040)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6571_cast_fp16 = conv(bias = blocks_23_mlp_2_bias_to_fp16, dilations = var_6571_dilations_0, groups = var_6571_groups_0, pad = var_6571_pad_0, pad_type = var_6571_pad_type_0, strides = var_6571_strides_0, weight = blocks_23_mlp_2_weight_to_fp16, x = input_241_cast_fp16)[name = tensor<string, []>("op_6571_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_97_cast_fp16 = add(x = inputs_95_cast_fp16, y = var_6571_cast_fp16)[name = tensor<string, []>("inputs_97_cast_fp16")];
+            tensor<int32, []> var_6580 = const()[name = tensor<string, []>("op_6580"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_243_axes_0 = const()[name = tensor<string, []>("input_243_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_243_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_243_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(959137664)))];
+            tensor<fp16, [1280]> input_243_beta_0_to_fp16 = const()[name = tensor<string, []>("input_243_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(959140288)))];
+            tensor<fp16, []> var_6596_to_fp16 = const()[name = tensor<string, []>("op_6596_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_243_cast_fp16 = layer_norm(axes = input_243_axes_0, beta = input_243_beta_0_to_fp16, epsilon = var_6596_to_fp16, gamma = input_243_gamma_0_to_fp16, x = inputs_97_cast_fp16)[name = tensor<string, []>("input_243_cast_fp16")];
+            tensor<string, []> q_49_pad_type_0 = const()[name = tensor<string, []>("q_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_49_strides_0 = const()[name = tensor<string, []>("q_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_49_pad_0 = const()[name = tensor<string, []>("q_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_49_dilations_0 = const()[name = tensor<string, []>("q_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_49_groups_0 = const()[name = tensor<string, []>("q_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_6631_weight_0_to_fp16 = const()[name = tensor<string, []>("op_6631_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(959142912)))];
+            tensor<fp16, [1280]> var_6631_bias_0_to_fp16 = const()[name = tensor<string, []>("op_6631_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(962419776)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6631_cast_fp16 = conv(bias = var_6631_bias_0_to_fp16, dilations = q_49_dilations_0, groups = q_49_groups_0, pad = q_49_pad_0, pad_type = q_49_pad_type_0, strides = q_49_strides_0, weight = var_6631_weight_0_to_fp16, x = input_243_cast_fp16)[name = tensor<string, []>("op_6631_cast_fp16")];
+            tensor<string, []> k_49_pad_type_0 = const()[name = tensor<string, []>("k_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_49_strides_0 = const()[name = tensor<string, []>("k_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_49_pad_0 = const()[name = tensor<string, []>("k_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_49_dilations_0 = const()[name = tensor<string, []>("k_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_49_groups_0 = const()[name = tensor<string, []>("k_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_24_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_24_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(962422400)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_49_cast_fp16 = conv(dilations = k_49_dilations_0, groups = k_49_groups_0, pad = k_49_pad_0, pad_type = k_49_pad_type_0, strides = k_49_strides_0, weight = blocks_24_attn_key_weight_to_fp16, x = input_243_cast_fp16)[name = tensor<string, []>("k_49_cast_fp16")];
+            tensor<string, []> var_6629_pad_type_0 = const()[name = tensor<string, []>("op_6629_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6629_strides_0 = const()[name = tensor<string, []>("op_6629_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6629_pad_0 = const()[name = tensor<string, []>("op_6629_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6629_dilations_0 = const()[name = tensor<string, []>("op_6629_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6629_groups_0 = const()[name = tensor<string, []>("op_6629_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_24_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_24_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(965699264)))];
+            tensor<fp16, [1280]> blocks_24_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_24_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(968976128)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6629_cast_fp16 = conv(bias = blocks_24_attn_value_bias_to_fp16, dilations = var_6629_dilations_0, groups = var_6629_groups_0, pad = var_6629_pad_0, pad_type = var_6629_pad_type_0, strides = var_6629_strides_0, weight = blocks_24_attn_value_weight_to_fp16, x = input_243_cast_fp16)[name = tensor<string, []>("op_6629_cast_fp16")];
+            tensor<int32, [20]> tile_72 = const()[name = tensor<string, []>("tile_72"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6632_axis_0 = const()[name = tensor<string, []>("op_6632_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6632_cast_fp16_19 = split(axis = var_6632_axis_0, split_sizes = tile_72, x = var_6631_cast_fp16)[name = tensor<string, []>("op_6632_cast_fp16")];
+            tensor<int32, [4]> var_6653_perm_0 = const()[name = tensor<string, []>("op_6653_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_73 = const()[name = tensor<string, []>("tile_73"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6654_axis_0 = const()[name = tensor<string, []>("op_6654_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_6653_cast_fp16 = transpose(perm = var_6653_perm_0, x = k_49_cast_fp16)[name = tensor<string, []>("transpose_8")];
+            tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_6654_cast_fp16_19 = split(axis = var_6654_axis_0, split_sizes = tile_73, x = var_6653_cast_fp16)[name = tensor<string, []>("op_6654_cast_fp16")];
+            tensor<int32, [20]> tile_74 = const()[name = tensor<string, []>("tile_74"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6675_axis_0 = const()[name = tensor<string, []>("op_6675_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6675_cast_fp16_19 = split(axis = var_6675_axis_0, split_sizes = tile_74, x = var_6629_cast_fp16)[name = tensor<string, []>("op_6675_cast_fp16")];
+            tensor<string, []> aw_961_equation_0 = const()[name = tensor<string, []>("aw_961_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_961_cast_fp16 = einsum(equation = aw_961_equation_0, values = (var_6654_cast_fp16_0, var_6632_cast_fp16_0))[name = tensor<string, []>("aw_961_cast_fp16")];
+            tensor<string, []> aw_963_equation_0 = const()[name = tensor<string, []>("aw_963_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_963_cast_fp16 = einsum(equation = aw_963_equation_0, values = (var_6654_cast_fp16_1, var_6632_cast_fp16_1))[name = tensor<string, []>("aw_963_cast_fp16")];
+            tensor<string, []> aw_965_equation_0 = const()[name = tensor<string, []>("aw_965_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_965_cast_fp16 = einsum(equation = aw_965_equation_0, values = (var_6654_cast_fp16_2, var_6632_cast_fp16_2))[name = tensor<string, []>("aw_965_cast_fp16")];
+            tensor<string, []> aw_967_equation_0 = const()[name = tensor<string, []>("aw_967_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_967_cast_fp16 = einsum(equation = aw_967_equation_0, values = (var_6654_cast_fp16_3, var_6632_cast_fp16_3))[name = tensor<string, []>("aw_967_cast_fp16")];
+            tensor<string, []> aw_969_equation_0 = const()[name = tensor<string, []>("aw_969_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_969_cast_fp16 = einsum(equation = aw_969_equation_0, values = (var_6654_cast_fp16_4, var_6632_cast_fp16_4))[name = tensor<string, []>("aw_969_cast_fp16")];
+            tensor<string, []> aw_971_equation_0 = const()[name = tensor<string, []>("aw_971_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_971_cast_fp16 = einsum(equation = aw_971_equation_0, values = (var_6654_cast_fp16_5, var_6632_cast_fp16_5))[name = tensor<string, []>("aw_971_cast_fp16")];
+            tensor<string, []> aw_973_equation_0 = const()[name = tensor<string, []>("aw_973_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_973_cast_fp16 = einsum(equation = aw_973_equation_0, values = (var_6654_cast_fp16_6, var_6632_cast_fp16_6))[name = tensor<string, []>("aw_973_cast_fp16")];
+            tensor<string, []> aw_975_equation_0 = const()[name = tensor<string, []>("aw_975_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_975_cast_fp16 = einsum(equation = aw_975_equation_0, values = (var_6654_cast_fp16_7, var_6632_cast_fp16_7))[name = tensor<string, []>("aw_975_cast_fp16")];
+            tensor<string, []> aw_977_equation_0 = const()[name = tensor<string, []>("aw_977_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_977_cast_fp16 = einsum(equation = aw_977_equation_0, values = (var_6654_cast_fp16_8, var_6632_cast_fp16_8))[name = tensor<string, []>("aw_977_cast_fp16")];
+            tensor<string, []> aw_979_equation_0 = const()[name = tensor<string, []>("aw_979_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_979_cast_fp16 = einsum(equation = aw_979_equation_0, values = (var_6654_cast_fp16_9, var_6632_cast_fp16_9))[name = tensor<string, []>("aw_979_cast_fp16")];
+            tensor<string, []> aw_981_equation_0 = const()[name = tensor<string, []>("aw_981_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_981_cast_fp16 = einsum(equation = aw_981_equation_0, values = (var_6654_cast_fp16_10, var_6632_cast_fp16_10))[name = tensor<string, []>("aw_981_cast_fp16")];
+            tensor<string, []> aw_983_equation_0 = const()[name = tensor<string, []>("aw_983_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_983_cast_fp16 = einsum(equation = aw_983_equation_0, values = (var_6654_cast_fp16_11, var_6632_cast_fp16_11))[name = tensor<string, []>("aw_983_cast_fp16")];
+            tensor<string, []> aw_985_equation_0 = const()[name = tensor<string, []>("aw_985_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_985_cast_fp16 = einsum(equation = aw_985_equation_0, values = (var_6654_cast_fp16_12, var_6632_cast_fp16_12))[name = tensor<string, []>("aw_985_cast_fp16")];
+            tensor<string, []> aw_987_equation_0 = const()[name = tensor<string, []>("aw_987_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_987_cast_fp16 = einsum(equation = aw_987_equation_0, values = (var_6654_cast_fp16_13, var_6632_cast_fp16_13))[name = tensor<string, []>("aw_987_cast_fp16")];
+            tensor<string, []> aw_989_equation_0 = const()[name = tensor<string, []>("aw_989_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_989_cast_fp16 = einsum(equation = aw_989_equation_0, values = (var_6654_cast_fp16_14, var_6632_cast_fp16_14))[name = tensor<string, []>("aw_989_cast_fp16")];
+            tensor<string, []> aw_991_equation_0 = const()[name = tensor<string, []>("aw_991_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_991_cast_fp16 = einsum(equation = aw_991_equation_0, values = (var_6654_cast_fp16_15, var_6632_cast_fp16_15))[name = tensor<string, []>("aw_991_cast_fp16")];
+            tensor<string, []> aw_993_equation_0 = const()[name = tensor<string, []>("aw_993_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_993_cast_fp16 = einsum(equation = aw_993_equation_0, values = (var_6654_cast_fp16_16, var_6632_cast_fp16_16))[name = tensor<string, []>("aw_993_cast_fp16")];
+            tensor<string, []> aw_995_equation_0 = const()[name = tensor<string, []>("aw_995_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_995_cast_fp16 = einsum(equation = aw_995_equation_0, values = (var_6654_cast_fp16_17, var_6632_cast_fp16_17))[name = tensor<string, []>("aw_995_cast_fp16")];
+            tensor<string, []> aw_997_equation_0 = const()[name = tensor<string, []>("aw_997_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_997_cast_fp16 = einsum(equation = aw_997_equation_0, values = (var_6654_cast_fp16_18, var_6632_cast_fp16_18))[name = tensor<string, []>("aw_997_cast_fp16")];
+            tensor<string, []> aw_999_equation_0 = const()[name = tensor<string, []>("aw_999_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_999_cast_fp16 = einsum(equation = aw_999_equation_0, values = (var_6654_cast_fp16_19, var_6632_cast_fp16_19))[name = tensor<string, []>("aw_999_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6736_cast_fp16 = softmax(axis = var_6580, x = aw_961_cast_fp16)[name = tensor<string, []>("op_6736_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6737_cast_fp16 = softmax(axis = var_6580, x = aw_963_cast_fp16)[name = tensor<string, []>("op_6737_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6738_cast_fp16 = softmax(axis = var_6580, x = aw_965_cast_fp16)[name = tensor<string, []>("op_6738_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6739_cast_fp16 = softmax(axis = var_6580, x = aw_967_cast_fp16)[name = tensor<string, []>("op_6739_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6740_cast_fp16 = softmax(axis = var_6580, x = aw_969_cast_fp16)[name = tensor<string, []>("op_6740_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6741_cast_fp16 = softmax(axis = var_6580, x = aw_971_cast_fp16)[name = tensor<string, []>("op_6741_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6742_cast_fp16 = softmax(axis = var_6580, x = aw_973_cast_fp16)[name = tensor<string, []>("op_6742_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6743_cast_fp16 = softmax(axis = var_6580, x = aw_975_cast_fp16)[name = tensor<string, []>("op_6743_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6744_cast_fp16 = softmax(axis = var_6580, x = aw_977_cast_fp16)[name = tensor<string, []>("op_6744_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6745_cast_fp16 = softmax(axis = var_6580, x = aw_979_cast_fp16)[name = tensor<string, []>("op_6745_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6746_cast_fp16 = softmax(axis = var_6580, x = aw_981_cast_fp16)[name = tensor<string, []>("op_6746_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6747_cast_fp16 = softmax(axis = var_6580, x = aw_983_cast_fp16)[name = tensor<string, []>("op_6747_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6748_cast_fp16 = softmax(axis = var_6580, x = aw_985_cast_fp16)[name = tensor<string, []>("op_6748_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6749_cast_fp16 = softmax(axis = var_6580, x = aw_987_cast_fp16)[name = tensor<string, []>("op_6749_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6750_cast_fp16 = softmax(axis = var_6580, x = aw_989_cast_fp16)[name = tensor<string, []>("op_6750_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6751_cast_fp16 = softmax(axis = var_6580, x = aw_991_cast_fp16)[name = tensor<string, []>("op_6751_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6752_cast_fp16 = softmax(axis = var_6580, x = aw_993_cast_fp16)[name = tensor<string, []>("op_6752_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6753_cast_fp16 = softmax(axis = var_6580, x = aw_995_cast_fp16)[name = tensor<string, []>("op_6753_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6754_cast_fp16 = softmax(axis = var_6580, x = aw_997_cast_fp16)[name = tensor<string, []>("op_6754_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_6755_cast_fp16 = softmax(axis = var_6580, x = aw_999_cast_fp16)[name = tensor<string, []>("op_6755_cast_fp16")];
+            tensor<string, []> var_6757_equation_0 = const()[name = tensor<string, []>("op_6757_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6757_cast_fp16 = einsum(equation = var_6757_equation_0, values = (var_6675_cast_fp16_0, var_6736_cast_fp16))[name = tensor<string, []>("op_6757_cast_fp16")];
+            tensor<string, []> var_6759_equation_0 = const()[name = tensor<string, []>("op_6759_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6759_cast_fp16 = einsum(equation = var_6759_equation_0, values = (var_6675_cast_fp16_1, var_6737_cast_fp16))[name = tensor<string, []>("op_6759_cast_fp16")];
+            tensor<string, []> var_6761_equation_0 = const()[name = tensor<string, []>("op_6761_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6761_cast_fp16 = einsum(equation = var_6761_equation_0, values = (var_6675_cast_fp16_2, var_6738_cast_fp16))[name = tensor<string, []>("op_6761_cast_fp16")];
+            tensor<string, []> var_6763_equation_0 = const()[name = tensor<string, []>("op_6763_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6763_cast_fp16 = einsum(equation = var_6763_equation_0, values = (var_6675_cast_fp16_3, var_6739_cast_fp16))[name = tensor<string, []>("op_6763_cast_fp16")];
+            tensor<string, []> var_6765_equation_0 = const()[name = tensor<string, []>("op_6765_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6765_cast_fp16 = einsum(equation = var_6765_equation_0, values = (var_6675_cast_fp16_4, var_6740_cast_fp16))[name = tensor<string, []>("op_6765_cast_fp16")];
+            tensor<string, []> var_6767_equation_0 = const()[name = tensor<string, []>("op_6767_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6767_cast_fp16 = einsum(equation = var_6767_equation_0, values = (var_6675_cast_fp16_5, var_6741_cast_fp16))[name = tensor<string, []>("op_6767_cast_fp16")];
+            tensor<string, []> var_6769_equation_0 = const()[name = tensor<string, []>("op_6769_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6769_cast_fp16 = einsum(equation = var_6769_equation_0, values = (var_6675_cast_fp16_6, var_6742_cast_fp16))[name = tensor<string, []>("op_6769_cast_fp16")];
+            tensor<string, []> var_6771_equation_0 = const()[name = tensor<string, []>("op_6771_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6771_cast_fp16 = einsum(equation = var_6771_equation_0, values = (var_6675_cast_fp16_7, var_6743_cast_fp16))[name = tensor<string, []>("op_6771_cast_fp16")];
+            tensor<string, []> var_6773_equation_0 = const()[name = tensor<string, []>("op_6773_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6773_cast_fp16 = einsum(equation = var_6773_equation_0, values = (var_6675_cast_fp16_8, var_6744_cast_fp16))[name = tensor<string, []>("op_6773_cast_fp16")];
+            tensor<string, []> var_6775_equation_0 = const()[name = tensor<string, []>("op_6775_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6775_cast_fp16 = einsum(equation = var_6775_equation_0, values = (var_6675_cast_fp16_9, var_6745_cast_fp16))[name = tensor<string, []>("op_6775_cast_fp16")];
+            tensor<string, []> var_6777_equation_0 = const()[name = tensor<string, []>("op_6777_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6777_cast_fp16 = einsum(equation = var_6777_equation_0, values = (var_6675_cast_fp16_10, var_6746_cast_fp16))[name = tensor<string, []>("op_6777_cast_fp16")];
+            tensor<string, []> var_6779_equation_0 = const()[name = tensor<string, []>("op_6779_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6779_cast_fp16 = einsum(equation = var_6779_equation_0, values = (var_6675_cast_fp16_11, var_6747_cast_fp16))[name = tensor<string, []>("op_6779_cast_fp16")];
+            tensor<string, []> var_6781_equation_0 = const()[name = tensor<string, []>("op_6781_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6781_cast_fp16 = einsum(equation = var_6781_equation_0, values = (var_6675_cast_fp16_12, var_6748_cast_fp16))[name = tensor<string, []>("op_6781_cast_fp16")];
+            tensor<string, []> var_6783_equation_0 = const()[name = tensor<string, []>("op_6783_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6783_cast_fp16 = einsum(equation = var_6783_equation_0, values = (var_6675_cast_fp16_13, var_6749_cast_fp16))[name = tensor<string, []>("op_6783_cast_fp16")];
+            tensor<string, []> var_6785_equation_0 = const()[name = tensor<string, []>("op_6785_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6785_cast_fp16 = einsum(equation = var_6785_equation_0, values = (var_6675_cast_fp16_14, var_6750_cast_fp16))[name = tensor<string, []>("op_6785_cast_fp16")];
+            tensor<string, []> var_6787_equation_0 = const()[name = tensor<string, []>("op_6787_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6787_cast_fp16 = einsum(equation = var_6787_equation_0, values = (var_6675_cast_fp16_15, var_6751_cast_fp16))[name = tensor<string, []>("op_6787_cast_fp16")];
+            tensor<string, []> var_6789_equation_0 = const()[name = tensor<string, []>("op_6789_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6789_cast_fp16 = einsum(equation = var_6789_equation_0, values = (var_6675_cast_fp16_16, var_6752_cast_fp16))[name = tensor<string, []>("op_6789_cast_fp16")];
+            tensor<string, []> var_6791_equation_0 = const()[name = tensor<string, []>("op_6791_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6791_cast_fp16 = einsum(equation = var_6791_equation_0, values = (var_6675_cast_fp16_17, var_6753_cast_fp16))[name = tensor<string, []>("op_6791_cast_fp16")];
+            tensor<string, []> var_6793_equation_0 = const()[name = tensor<string, []>("op_6793_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6793_cast_fp16 = einsum(equation = var_6793_equation_0, values = (var_6675_cast_fp16_18, var_6754_cast_fp16))[name = tensor<string, []>("op_6793_cast_fp16")];
+            tensor<string, []> var_6795_equation_0 = const()[name = tensor<string, []>("op_6795_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_6795_cast_fp16 = einsum(equation = var_6795_equation_0, values = (var_6675_cast_fp16_19, var_6755_cast_fp16))[name = tensor<string, []>("op_6795_cast_fp16")];
+            tensor<bool, []> input_245_interleave_0 = const()[name = tensor<string, []>("input_245_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_245_cast_fp16 = concat(axis = var_6580, interleave = input_245_interleave_0, values = (var_6757_cast_fp16, var_6759_cast_fp16, var_6761_cast_fp16, var_6763_cast_fp16, var_6765_cast_fp16, var_6767_cast_fp16, var_6769_cast_fp16, var_6771_cast_fp16, var_6773_cast_fp16, var_6775_cast_fp16, var_6777_cast_fp16, var_6779_cast_fp16, var_6781_cast_fp16, var_6783_cast_fp16, var_6785_cast_fp16, var_6787_cast_fp16, var_6789_cast_fp16, var_6791_cast_fp16, var_6793_cast_fp16, var_6795_cast_fp16))[name = tensor<string, []>("input_245_cast_fp16")];
+            tensor<string, []> var_6804_pad_type_0 = const()[name = tensor<string, []>("op_6804_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6804_strides_0 = const()[name = tensor<string, []>("op_6804_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6804_pad_0 = const()[name = tensor<string, []>("op_6804_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6804_dilations_0 = const()[name = tensor<string, []>("op_6804_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6804_groups_0 = const()[name = tensor<string, []>("op_6804_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_24_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_24_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(968978752)))];
+            tensor<fp16, [1280]> blocks_24_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_24_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(972255616)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6804_cast_fp16 = conv(bias = blocks_24_attn_out_bias_to_fp16, dilations = var_6804_dilations_0, groups = var_6804_groups_0, pad = var_6804_pad_0, pad_type = var_6804_pad_type_0, strides = var_6804_strides_0, weight = blocks_24_attn_out_weight_to_fp16, x = input_245_cast_fp16)[name = tensor<string, []>("op_6804_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_99_cast_fp16 = add(x = inputs_97_cast_fp16, y = var_6804_cast_fp16)[name = tensor<string, []>("inputs_99_cast_fp16")];
+            tensor<int32, [1]> input_247_axes_0 = const()[name = tensor<string, []>("input_247_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_247_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_247_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(972258240)))];
+            tensor<fp16, [1280]> input_247_beta_0_to_fp16 = const()[name = tensor<string, []>("input_247_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(972260864)))];
+            tensor<fp16, []> var_6814_to_fp16 = const()[name = tensor<string, []>("op_6814_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_247_cast_fp16 = layer_norm(axes = input_247_axes_0, beta = input_247_beta_0_to_fp16, epsilon = var_6814_to_fp16, gamma = input_247_gamma_0_to_fp16, x = inputs_99_cast_fp16)[name = tensor<string, []>("input_247_cast_fp16")];
+            tensor<string, []> input_249_pad_type_0 = const()[name = tensor<string, []>("input_249_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_249_strides_0 = const()[name = tensor<string, []>("input_249_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_249_pad_0 = const()[name = tensor<string, []>("input_249_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_249_dilations_0 = const()[name = tensor<string, []>("input_249_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_249_groups_0 = const()[name = tensor<string, []>("input_249_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_24_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_24_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(972263488)))];
+            tensor<fp16, [5120]> blocks_24_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_24_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(985370752)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_249_cast_fp16 = conv(bias = blocks_24_mlp_0_bias_to_fp16, dilations = input_249_dilations_0, groups = input_249_groups_0, pad = input_249_pad_0, pad_type = input_249_pad_type_0, strides = input_249_strides_0, weight = blocks_24_mlp_0_weight_to_fp16, x = input_247_cast_fp16)[name = tensor<string, []>("input_249_cast_fp16")];
+            tensor<string, []> input_251_mode_0 = const()[name = tensor<string, []>("input_251_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_251_cast_fp16 = gelu(mode = input_251_mode_0, x = input_249_cast_fp16)[name = tensor<string, []>("input_251_cast_fp16")];
+            tensor<string, []> var_6840_pad_type_0 = const()[name = tensor<string, []>("op_6840_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6840_strides_0 = const()[name = tensor<string, []>("op_6840_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6840_pad_0 = const()[name = tensor<string, []>("op_6840_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6840_dilations_0 = const()[name = tensor<string, []>("op_6840_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6840_groups_0 = const()[name = tensor<string, []>("op_6840_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_24_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_24_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(985381056)))];
+            tensor<fp16, [1280]> blocks_24_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_24_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(998488320)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6840_cast_fp16 = conv(bias = blocks_24_mlp_2_bias_to_fp16, dilations = var_6840_dilations_0, groups = var_6840_groups_0, pad = var_6840_pad_0, pad_type = var_6840_pad_type_0, strides = var_6840_strides_0, weight = blocks_24_mlp_2_weight_to_fp16, x = input_251_cast_fp16)[name = tensor<string, []>("op_6840_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_101_cast_fp16 = add(x = inputs_99_cast_fp16, y = var_6840_cast_fp16)[name = tensor<string, []>("inputs_101_cast_fp16")];
+            tensor<int32, []> var_6849 = const()[name = tensor<string, []>("op_6849"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_253_axes_0 = const()[name = tensor<string, []>("input_253_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_253_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_253_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(998490944)))];
+            tensor<fp16, [1280]> input_253_beta_0_to_fp16 = const()[name = tensor<string, []>("input_253_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(998493568)))];
+            tensor<fp16, []> var_6865_to_fp16 = const()[name = tensor<string, []>("op_6865_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_253_cast_fp16 = layer_norm(axes = input_253_axes_0, beta = input_253_beta_0_to_fp16, epsilon = var_6865_to_fp16, gamma = input_253_gamma_0_to_fp16, x = inputs_101_cast_fp16)[name = tensor<string, []>("input_253_cast_fp16")];
+            tensor<string, []> q_51_pad_type_0 = const()[name = tensor<string, []>("q_51_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_51_strides_0 = const()[name = tensor<string, []>("q_51_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_51_pad_0 = const()[name = tensor<string, []>("q_51_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_51_dilations_0 = const()[name = tensor<string, []>("q_51_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_51_groups_0 = const()[name = tensor<string, []>("q_51_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_6900_weight_0_to_fp16 = const()[name = tensor<string, []>("op_6900_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(998496192)))];
+            tensor<fp16, [1280]> var_6900_bias_0_to_fp16 = const()[name = tensor<string, []>("op_6900_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1001773056)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6900_cast_fp16 = conv(bias = var_6900_bias_0_to_fp16, dilations = q_51_dilations_0, groups = q_51_groups_0, pad = q_51_pad_0, pad_type = q_51_pad_type_0, strides = q_51_strides_0, weight = var_6900_weight_0_to_fp16, x = input_253_cast_fp16)[name = tensor<string, []>("op_6900_cast_fp16")];
+            tensor<string, []> k_51_pad_type_0 = const()[name = tensor<string, []>("k_51_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_51_strides_0 = const()[name = tensor<string, []>("k_51_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_51_pad_0 = const()[name = tensor<string, []>("k_51_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_51_dilations_0 = const()[name = tensor<string, []>("k_51_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_51_groups_0 = const()[name = tensor<string, []>("k_51_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_25_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_25_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1001775680)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_51_cast_fp16 = conv(dilations = k_51_dilations_0, groups = k_51_groups_0, pad = k_51_pad_0, pad_type = k_51_pad_type_0, strides = k_51_strides_0, weight = blocks_25_attn_key_weight_to_fp16, x = input_253_cast_fp16)[name = tensor<string, []>("k_51_cast_fp16")];
+            tensor<string, []> var_6898_pad_type_0 = const()[name = tensor<string, []>("op_6898_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_6898_strides_0 = const()[name = tensor<string, []>("op_6898_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_6898_pad_0 = const()[name = tensor<string, []>("op_6898_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_6898_dilations_0 = const()[name = tensor<string, []>("op_6898_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_6898_groups_0 = const()[name = tensor<string, []>("op_6898_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_25_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_25_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1005052544)))];
+            tensor<fp16, [1280]> blocks_25_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_25_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1008329408)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_6898_cast_fp16 = conv(bias = blocks_25_attn_value_bias_to_fp16, dilations = var_6898_dilations_0, groups = var_6898_groups_0, pad = var_6898_pad_0, pad_type = var_6898_pad_type_0, strides = var_6898_strides_0, weight = blocks_25_attn_value_weight_to_fp16, x = input_253_cast_fp16)[name = tensor<string, []>("op_6898_cast_fp16")];
+            tensor<int32, [20]> tile_75 = const()[name = tensor<string, []>("tile_75"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6901_axis_0 = const()[name = tensor<string, []>("op_6901_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6901_cast_fp16_19 = split(axis = var_6901_axis_0, split_sizes = tile_75, x = var_6900_cast_fp16)[name = tensor<string, []>("op_6901_cast_fp16")];
+            tensor<int32, [4]> var_6922_perm_0 = const()[name = tensor<string, []>("op_6922_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_76 = const()[name = tensor<string, []>("tile_76"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6923_axis_0 = const()[name = tensor<string, []>("op_6923_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_6922_cast_fp16 = transpose(perm = var_6922_perm_0, x = k_51_cast_fp16)[name = tensor<string, []>("transpose_7")];
+            tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_6923_cast_fp16_19 = split(axis = var_6923_axis_0, split_sizes = tile_76, x = var_6922_cast_fp16)[name = tensor<string, []>("op_6923_cast_fp16")];
+            tensor<int32, [20]> tile_77 = const()[name = tensor<string, []>("tile_77"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_6944_axis_0 = const()[name = tensor<string, []>("op_6944_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_6944_cast_fp16_19 = split(axis = var_6944_axis_0, split_sizes = tile_77, x = var_6898_cast_fp16)[name = tensor<string, []>("op_6944_cast_fp16")];
+            tensor<string, []> aw_1001_equation_0 = const()[name = tensor<string, []>("aw_1001_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1001_cast_fp16 = einsum(equation = aw_1001_equation_0, values = (var_6923_cast_fp16_0, var_6901_cast_fp16_0))[name = tensor<string, []>("aw_1001_cast_fp16")];
+            tensor<string, []> aw_1003_equation_0 = const()[name = tensor<string, []>("aw_1003_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1003_cast_fp16 = einsum(equation = aw_1003_equation_0, values = (var_6923_cast_fp16_1, var_6901_cast_fp16_1))[name = tensor<string, []>("aw_1003_cast_fp16")];
+            tensor<string, []> aw_1005_equation_0 = const()[name = tensor<string, []>("aw_1005_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1005_cast_fp16 = einsum(equation = aw_1005_equation_0, values = (var_6923_cast_fp16_2, var_6901_cast_fp16_2))[name = tensor<string, []>("aw_1005_cast_fp16")];
+            tensor<string, []> aw_1007_equation_0 = const()[name = tensor<string, []>("aw_1007_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1007_cast_fp16 = einsum(equation = aw_1007_equation_0, values = (var_6923_cast_fp16_3, var_6901_cast_fp16_3))[name = tensor<string, []>("aw_1007_cast_fp16")];
+            tensor<string, []> aw_1009_equation_0 = const()[name = tensor<string, []>("aw_1009_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1009_cast_fp16 = einsum(equation = aw_1009_equation_0, values = (var_6923_cast_fp16_4, var_6901_cast_fp16_4))[name = tensor<string, []>("aw_1009_cast_fp16")];
+            tensor<string, []> aw_1011_equation_0 = const()[name = tensor<string, []>("aw_1011_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1011_cast_fp16 = einsum(equation = aw_1011_equation_0, values = (var_6923_cast_fp16_5, var_6901_cast_fp16_5))[name = tensor<string, []>("aw_1011_cast_fp16")];
+            tensor<string, []> aw_1013_equation_0 = const()[name = tensor<string, []>("aw_1013_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1013_cast_fp16 = einsum(equation = aw_1013_equation_0, values = (var_6923_cast_fp16_6, var_6901_cast_fp16_6))[name = tensor<string, []>("aw_1013_cast_fp16")];
+            tensor<string, []> aw_1015_equation_0 = const()[name = tensor<string, []>("aw_1015_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1015_cast_fp16 = einsum(equation = aw_1015_equation_0, values = (var_6923_cast_fp16_7, var_6901_cast_fp16_7))[name = tensor<string, []>("aw_1015_cast_fp16")];
+            tensor<string, []> aw_1017_equation_0 = const()[name = tensor<string, []>("aw_1017_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1017_cast_fp16 = einsum(equation = aw_1017_equation_0, values = (var_6923_cast_fp16_8, var_6901_cast_fp16_8))[name = tensor<string, []>("aw_1017_cast_fp16")];
+            tensor<string, []> aw_1019_equation_0 = const()[name = tensor<string, []>("aw_1019_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1019_cast_fp16 = einsum(equation = aw_1019_equation_0, values = (var_6923_cast_fp16_9, var_6901_cast_fp16_9))[name = tensor<string, []>("aw_1019_cast_fp16")];
+            tensor<string, []> aw_1021_equation_0 = const()[name = tensor<string, []>("aw_1021_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1021_cast_fp16 = einsum(equation = aw_1021_equation_0, values = (var_6923_cast_fp16_10, var_6901_cast_fp16_10))[name = tensor<string, []>("aw_1021_cast_fp16")];
+            tensor<string, []> aw_1023_equation_0 = const()[name = tensor<string, []>("aw_1023_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1023_cast_fp16 = einsum(equation = aw_1023_equation_0, values = (var_6923_cast_fp16_11, var_6901_cast_fp16_11))[name = tensor<string, []>("aw_1023_cast_fp16")];
+            tensor<string, []> aw_1025_equation_0 = const()[name = tensor<string, []>("aw_1025_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1025_cast_fp16 = einsum(equation = aw_1025_equation_0, values = (var_6923_cast_fp16_12, var_6901_cast_fp16_12))[name = tensor<string, []>("aw_1025_cast_fp16")];
+            tensor<string, []> aw_1027_equation_0 = const()[name = tensor<string, []>("aw_1027_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1027_cast_fp16 = einsum(equation = aw_1027_equation_0, values = (var_6923_cast_fp16_13, var_6901_cast_fp16_13))[name = tensor<string, []>("aw_1027_cast_fp16")];
+            tensor<string, []> aw_1029_equation_0 = const()[name = tensor<string, []>("aw_1029_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1029_cast_fp16 = einsum(equation = aw_1029_equation_0, values = (var_6923_cast_fp16_14, var_6901_cast_fp16_14))[name = tensor<string, []>("aw_1029_cast_fp16")];
+            tensor<string, []> aw_1031_equation_0 = const()[name = tensor<string, []>("aw_1031_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1031_cast_fp16 = einsum(equation = aw_1031_equation_0, values = (var_6923_cast_fp16_15, var_6901_cast_fp16_15))[name = tensor<string, []>("aw_1031_cast_fp16")];
+            tensor<string, []> aw_1033_equation_0 = const()[name = tensor<string, []>("aw_1033_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1033_cast_fp16 = einsum(equation = aw_1033_equation_0, values = (var_6923_cast_fp16_16, var_6901_cast_fp16_16))[name = tensor<string, []>("aw_1033_cast_fp16")];
+            tensor<string, []> aw_1035_equation_0 = const()[name = tensor<string, []>("aw_1035_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1035_cast_fp16 = einsum(equation = aw_1035_equation_0, values = (var_6923_cast_fp16_17, var_6901_cast_fp16_17))[name = tensor<string, []>("aw_1035_cast_fp16")];
+            tensor<string, []> aw_1037_equation_0 = const()[name = tensor<string, []>("aw_1037_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1037_cast_fp16 = einsum(equation = aw_1037_equation_0, values = (var_6923_cast_fp16_18, var_6901_cast_fp16_18))[name = tensor<string, []>("aw_1037_cast_fp16")];
+            tensor<string, []> aw_1039_equation_0 = const()[name = tensor<string, []>("aw_1039_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1039_cast_fp16 = einsum(equation = aw_1039_equation_0, values = (var_6923_cast_fp16_19, var_6901_cast_fp16_19))[name = tensor<string, []>("aw_1039_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7005_cast_fp16 = softmax(axis = var_6849, x = aw_1001_cast_fp16)[name = tensor<string, []>("op_7005_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7006_cast_fp16 = softmax(axis = var_6849, x = aw_1003_cast_fp16)[name = tensor<string, []>("op_7006_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7007_cast_fp16 = softmax(axis = var_6849, x = aw_1005_cast_fp16)[name = tensor<string, []>("op_7007_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7008_cast_fp16 = softmax(axis = var_6849, x = aw_1007_cast_fp16)[name = tensor<string, []>("op_7008_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7009_cast_fp16 = softmax(axis = var_6849, x = aw_1009_cast_fp16)[name = tensor<string, []>("op_7009_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7010_cast_fp16 = softmax(axis = var_6849, x = aw_1011_cast_fp16)[name = tensor<string, []>("op_7010_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7011_cast_fp16 = softmax(axis = var_6849, x = aw_1013_cast_fp16)[name = tensor<string, []>("op_7011_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7012_cast_fp16 = softmax(axis = var_6849, x = aw_1015_cast_fp16)[name = tensor<string, []>("op_7012_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7013_cast_fp16 = softmax(axis = var_6849, x = aw_1017_cast_fp16)[name = tensor<string, []>("op_7013_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7014_cast_fp16 = softmax(axis = var_6849, x = aw_1019_cast_fp16)[name = tensor<string, []>("op_7014_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7015_cast_fp16 = softmax(axis = var_6849, x = aw_1021_cast_fp16)[name = tensor<string, []>("op_7015_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7016_cast_fp16 = softmax(axis = var_6849, x = aw_1023_cast_fp16)[name = tensor<string, []>("op_7016_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7017_cast_fp16 = softmax(axis = var_6849, x = aw_1025_cast_fp16)[name = tensor<string, []>("op_7017_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7018_cast_fp16 = softmax(axis = var_6849, x = aw_1027_cast_fp16)[name = tensor<string, []>("op_7018_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7019_cast_fp16 = softmax(axis = var_6849, x = aw_1029_cast_fp16)[name = tensor<string, []>("op_7019_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7020_cast_fp16 = softmax(axis = var_6849, x = aw_1031_cast_fp16)[name = tensor<string, []>("op_7020_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7021_cast_fp16 = softmax(axis = var_6849, x = aw_1033_cast_fp16)[name = tensor<string, []>("op_7021_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7022_cast_fp16 = softmax(axis = var_6849, x = aw_1035_cast_fp16)[name = tensor<string, []>("op_7022_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7023_cast_fp16 = softmax(axis = var_6849, x = aw_1037_cast_fp16)[name = tensor<string, []>("op_7023_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7024_cast_fp16 = softmax(axis = var_6849, x = aw_1039_cast_fp16)[name = tensor<string, []>("op_7024_cast_fp16")];
+            tensor<string, []> var_7026_equation_0 = const()[name = tensor<string, []>("op_7026_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7026_cast_fp16 = einsum(equation = var_7026_equation_0, values = (var_6944_cast_fp16_0, var_7005_cast_fp16))[name = tensor<string, []>("op_7026_cast_fp16")];
+            tensor<string, []> var_7028_equation_0 = const()[name = tensor<string, []>("op_7028_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7028_cast_fp16 = einsum(equation = var_7028_equation_0, values = (var_6944_cast_fp16_1, var_7006_cast_fp16))[name = tensor<string, []>("op_7028_cast_fp16")];
+            tensor<string, []> var_7030_equation_0 = const()[name = tensor<string, []>("op_7030_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7030_cast_fp16 = einsum(equation = var_7030_equation_0, values = (var_6944_cast_fp16_2, var_7007_cast_fp16))[name = tensor<string, []>("op_7030_cast_fp16")];
+            tensor<string, []> var_7032_equation_0 = const()[name = tensor<string, []>("op_7032_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7032_cast_fp16 = einsum(equation = var_7032_equation_0, values = (var_6944_cast_fp16_3, var_7008_cast_fp16))[name = tensor<string, []>("op_7032_cast_fp16")];
+            tensor<string, []> var_7034_equation_0 = const()[name = tensor<string, []>("op_7034_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7034_cast_fp16 = einsum(equation = var_7034_equation_0, values = (var_6944_cast_fp16_4, var_7009_cast_fp16))[name = tensor<string, []>("op_7034_cast_fp16")];
+            tensor<string, []> var_7036_equation_0 = const()[name = tensor<string, []>("op_7036_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7036_cast_fp16 = einsum(equation = var_7036_equation_0, values = (var_6944_cast_fp16_5, var_7010_cast_fp16))[name = tensor<string, []>("op_7036_cast_fp16")];
+            tensor<string, []> var_7038_equation_0 = const()[name = tensor<string, []>("op_7038_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7038_cast_fp16 = einsum(equation = var_7038_equation_0, values = (var_6944_cast_fp16_6, var_7011_cast_fp16))[name = tensor<string, []>("op_7038_cast_fp16")];
+            tensor<string, []> var_7040_equation_0 = const()[name = tensor<string, []>("op_7040_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7040_cast_fp16 = einsum(equation = var_7040_equation_0, values = (var_6944_cast_fp16_7, var_7012_cast_fp16))[name = tensor<string, []>("op_7040_cast_fp16")];
+            tensor<string, []> var_7042_equation_0 = const()[name = tensor<string, []>("op_7042_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7042_cast_fp16 = einsum(equation = var_7042_equation_0, values = (var_6944_cast_fp16_8, var_7013_cast_fp16))[name = tensor<string, []>("op_7042_cast_fp16")];
+            tensor<string, []> var_7044_equation_0 = const()[name = tensor<string, []>("op_7044_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7044_cast_fp16 = einsum(equation = var_7044_equation_0, values = (var_6944_cast_fp16_9, var_7014_cast_fp16))[name = tensor<string, []>("op_7044_cast_fp16")];
+            tensor<string, []> var_7046_equation_0 = const()[name = tensor<string, []>("op_7046_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7046_cast_fp16 = einsum(equation = var_7046_equation_0, values = (var_6944_cast_fp16_10, var_7015_cast_fp16))[name = tensor<string, []>("op_7046_cast_fp16")];
+            tensor<string, []> var_7048_equation_0 = const()[name = tensor<string, []>("op_7048_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7048_cast_fp16 = einsum(equation = var_7048_equation_0, values = (var_6944_cast_fp16_11, var_7016_cast_fp16))[name = tensor<string, []>("op_7048_cast_fp16")];
+            tensor<string, []> var_7050_equation_0 = const()[name = tensor<string, []>("op_7050_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7050_cast_fp16 = einsum(equation = var_7050_equation_0, values = (var_6944_cast_fp16_12, var_7017_cast_fp16))[name = tensor<string, []>("op_7050_cast_fp16")];
+            tensor<string, []> var_7052_equation_0 = const()[name = tensor<string, []>("op_7052_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7052_cast_fp16 = einsum(equation = var_7052_equation_0, values = (var_6944_cast_fp16_13, var_7018_cast_fp16))[name = tensor<string, []>("op_7052_cast_fp16")];
+            tensor<string, []> var_7054_equation_0 = const()[name = tensor<string, []>("op_7054_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7054_cast_fp16 = einsum(equation = var_7054_equation_0, values = (var_6944_cast_fp16_14, var_7019_cast_fp16))[name = tensor<string, []>("op_7054_cast_fp16")];
+            tensor<string, []> var_7056_equation_0 = const()[name = tensor<string, []>("op_7056_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7056_cast_fp16 = einsum(equation = var_7056_equation_0, values = (var_6944_cast_fp16_15, var_7020_cast_fp16))[name = tensor<string, []>("op_7056_cast_fp16")];
+            tensor<string, []> var_7058_equation_0 = const()[name = tensor<string, []>("op_7058_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7058_cast_fp16 = einsum(equation = var_7058_equation_0, values = (var_6944_cast_fp16_16, var_7021_cast_fp16))[name = tensor<string, []>("op_7058_cast_fp16")];
+            tensor<string, []> var_7060_equation_0 = const()[name = tensor<string, []>("op_7060_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7060_cast_fp16 = einsum(equation = var_7060_equation_0, values = (var_6944_cast_fp16_17, var_7022_cast_fp16))[name = tensor<string, []>("op_7060_cast_fp16")];
+            tensor<string, []> var_7062_equation_0 = const()[name = tensor<string, []>("op_7062_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7062_cast_fp16 = einsum(equation = var_7062_equation_0, values = (var_6944_cast_fp16_18, var_7023_cast_fp16))[name = tensor<string, []>("op_7062_cast_fp16")];
+            tensor<string, []> var_7064_equation_0 = const()[name = tensor<string, []>("op_7064_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7064_cast_fp16 = einsum(equation = var_7064_equation_0, values = (var_6944_cast_fp16_19, var_7024_cast_fp16))[name = tensor<string, []>("op_7064_cast_fp16")];
+            tensor<bool, []> input_255_interleave_0 = const()[name = tensor<string, []>("input_255_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_255_cast_fp16 = concat(axis = var_6849, interleave = input_255_interleave_0, values = (var_7026_cast_fp16, var_7028_cast_fp16, var_7030_cast_fp16, var_7032_cast_fp16, var_7034_cast_fp16, var_7036_cast_fp16, var_7038_cast_fp16, var_7040_cast_fp16, var_7042_cast_fp16, var_7044_cast_fp16, var_7046_cast_fp16, var_7048_cast_fp16, var_7050_cast_fp16, var_7052_cast_fp16, var_7054_cast_fp16, var_7056_cast_fp16, var_7058_cast_fp16, var_7060_cast_fp16, var_7062_cast_fp16, var_7064_cast_fp16))[name = tensor<string, []>("input_255_cast_fp16")];
+            tensor<string, []> var_7073_pad_type_0 = const()[name = tensor<string, []>("op_7073_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7073_strides_0 = const()[name = tensor<string, []>("op_7073_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7073_pad_0 = const()[name = tensor<string, []>("op_7073_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7073_dilations_0 = const()[name = tensor<string, []>("op_7073_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7073_groups_0 = const()[name = tensor<string, []>("op_7073_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_25_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_25_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1008332032)))];
+            tensor<fp16, [1280]> blocks_25_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_25_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1011608896)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7073_cast_fp16 = conv(bias = blocks_25_attn_out_bias_to_fp16, dilations = var_7073_dilations_0, groups = var_7073_groups_0, pad = var_7073_pad_0, pad_type = var_7073_pad_type_0, strides = var_7073_strides_0, weight = blocks_25_attn_out_weight_to_fp16, x = input_255_cast_fp16)[name = tensor<string, []>("op_7073_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_103_cast_fp16 = add(x = inputs_101_cast_fp16, y = var_7073_cast_fp16)[name = tensor<string, []>("inputs_103_cast_fp16")];
+            tensor<int32, [1]> input_257_axes_0 = const()[name = tensor<string, []>("input_257_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_257_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_257_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1011611520)))];
+            tensor<fp16, [1280]> input_257_beta_0_to_fp16 = const()[name = tensor<string, []>("input_257_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1011614144)))];
+            tensor<fp16, []> var_7083_to_fp16 = const()[name = tensor<string, []>("op_7083_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_257_cast_fp16 = layer_norm(axes = input_257_axes_0, beta = input_257_beta_0_to_fp16, epsilon = var_7083_to_fp16, gamma = input_257_gamma_0_to_fp16, x = inputs_103_cast_fp16)[name = tensor<string, []>("input_257_cast_fp16")];
+            tensor<string, []> input_259_pad_type_0 = const()[name = tensor<string, []>("input_259_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_259_strides_0 = const()[name = tensor<string, []>("input_259_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_259_pad_0 = const()[name = tensor<string, []>("input_259_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_259_dilations_0 = const()[name = tensor<string, []>("input_259_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_259_groups_0 = const()[name = tensor<string, []>("input_259_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_25_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_25_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1011616768)))];
+            tensor<fp16, [5120]> blocks_25_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_25_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1024724032)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_259_cast_fp16 = conv(bias = blocks_25_mlp_0_bias_to_fp16, dilations = input_259_dilations_0, groups = input_259_groups_0, pad = input_259_pad_0, pad_type = input_259_pad_type_0, strides = input_259_strides_0, weight = blocks_25_mlp_0_weight_to_fp16, x = input_257_cast_fp16)[name = tensor<string, []>("input_259_cast_fp16")];
+            tensor<string, []> input_261_mode_0 = const()[name = tensor<string, []>("input_261_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_261_cast_fp16 = gelu(mode = input_261_mode_0, x = input_259_cast_fp16)[name = tensor<string, []>("input_261_cast_fp16")];
+            tensor<string, []> var_7109_pad_type_0 = const()[name = tensor<string, []>("op_7109_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7109_strides_0 = const()[name = tensor<string, []>("op_7109_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7109_pad_0 = const()[name = tensor<string, []>("op_7109_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7109_dilations_0 = const()[name = tensor<string, []>("op_7109_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7109_groups_0 = const()[name = tensor<string, []>("op_7109_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_25_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_25_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1024734336)))];
+            tensor<fp16, [1280]> blocks_25_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_25_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1037841600)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7109_cast_fp16 = conv(bias = blocks_25_mlp_2_bias_to_fp16, dilations = var_7109_dilations_0, groups = var_7109_groups_0, pad = var_7109_pad_0, pad_type = var_7109_pad_type_0, strides = var_7109_strides_0, weight = blocks_25_mlp_2_weight_to_fp16, x = input_261_cast_fp16)[name = tensor<string, []>("op_7109_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_105_cast_fp16 = add(x = inputs_103_cast_fp16, y = var_7109_cast_fp16)[name = tensor<string, []>("inputs_105_cast_fp16")];
+            tensor<int32, []> var_7118 = const()[name = tensor<string, []>("op_7118"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_263_axes_0 = const()[name = tensor<string, []>("input_263_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_263_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_263_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1037844224)))];
+            tensor<fp16, [1280]> input_263_beta_0_to_fp16 = const()[name = tensor<string, []>("input_263_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1037846848)))];
+            tensor<fp16, []> var_7134_to_fp16 = const()[name = tensor<string, []>("op_7134_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_263_cast_fp16 = layer_norm(axes = input_263_axes_0, beta = input_263_beta_0_to_fp16, epsilon = var_7134_to_fp16, gamma = input_263_gamma_0_to_fp16, x = inputs_105_cast_fp16)[name = tensor<string, []>("input_263_cast_fp16")];
+            tensor<string, []> q_53_pad_type_0 = const()[name = tensor<string, []>("q_53_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_53_strides_0 = const()[name = tensor<string, []>("q_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_53_pad_0 = const()[name = tensor<string, []>("q_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_53_dilations_0 = const()[name = tensor<string, []>("q_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_53_groups_0 = const()[name = tensor<string, []>("q_53_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_7169_weight_0_to_fp16 = const()[name = tensor<string, []>("op_7169_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1037849472)))];
+            tensor<fp16, [1280]> var_7169_bias_0_to_fp16 = const()[name = tensor<string, []>("op_7169_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1041126336)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7169_cast_fp16 = conv(bias = var_7169_bias_0_to_fp16, dilations = q_53_dilations_0, groups = q_53_groups_0, pad = q_53_pad_0, pad_type = q_53_pad_type_0, strides = q_53_strides_0, weight = var_7169_weight_0_to_fp16, x = input_263_cast_fp16)[name = tensor<string, []>("op_7169_cast_fp16")];
+            tensor<string, []> k_53_pad_type_0 = const()[name = tensor<string, []>("k_53_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_53_strides_0 = const()[name = tensor<string, []>("k_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_53_pad_0 = const()[name = tensor<string, []>("k_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_53_dilations_0 = const()[name = tensor<string, []>("k_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_53_groups_0 = const()[name = tensor<string, []>("k_53_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_26_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_26_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1041128960)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_53_cast_fp16 = conv(dilations = k_53_dilations_0, groups = k_53_groups_0, pad = k_53_pad_0, pad_type = k_53_pad_type_0, strides = k_53_strides_0, weight = blocks_26_attn_key_weight_to_fp16, x = input_263_cast_fp16)[name = tensor<string, []>("k_53_cast_fp16")];
+            tensor<string, []> var_7167_pad_type_0 = const()[name = tensor<string, []>("op_7167_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7167_strides_0 = const()[name = tensor<string, []>("op_7167_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7167_pad_0 = const()[name = tensor<string, []>("op_7167_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7167_dilations_0 = const()[name = tensor<string, []>("op_7167_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7167_groups_0 = const()[name = tensor<string, []>("op_7167_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_26_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_26_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1044405824)))];
+            tensor<fp16, [1280]> blocks_26_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_26_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1047682688)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7167_cast_fp16 = conv(bias = blocks_26_attn_value_bias_to_fp16, dilations = var_7167_dilations_0, groups = var_7167_groups_0, pad = var_7167_pad_0, pad_type = var_7167_pad_type_0, strides = var_7167_strides_0, weight = blocks_26_attn_value_weight_to_fp16, x = input_263_cast_fp16)[name = tensor<string, []>("op_7167_cast_fp16")];
+            tensor<int32, [20]> tile_78 = const()[name = tensor<string, []>("tile_78"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7170_axis_0 = const()[name = tensor<string, []>("op_7170_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7170_cast_fp16_19 = split(axis = var_7170_axis_0, split_sizes = tile_78, x = var_7169_cast_fp16)[name = tensor<string, []>("op_7170_cast_fp16")];
+            tensor<int32, [4]> var_7191_perm_0 = const()[name = tensor<string, []>("op_7191_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_79 = const()[name = tensor<string, []>("tile_79"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7192_axis_0 = const()[name = tensor<string, []>("op_7192_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_7191_cast_fp16 = transpose(perm = var_7191_perm_0, x = k_53_cast_fp16)[name = tensor<string, []>("transpose_6")];
+            tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_7192_cast_fp16_19 = split(axis = var_7192_axis_0, split_sizes = tile_79, x = var_7191_cast_fp16)[name = tensor<string, []>("op_7192_cast_fp16")];
+            tensor<int32, [20]> tile_80 = const()[name = tensor<string, []>("tile_80"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7213_axis_0 = const()[name = tensor<string, []>("op_7213_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7213_cast_fp16_19 = split(axis = var_7213_axis_0, split_sizes = tile_80, x = var_7167_cast_fp16)[name = tensor<string, []>("op_7213_cast_fp16")];
+            tensor<string, []> aw_1041_equation_0 = const()[name = tensor<string, []>("aw_1041_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1041_cast_fp16 = einsum(equation = aw_1041_equation_0, values = (var_7192_cast_fp16_0, var_7170_cast_fp16_0))[name = tensor<string, []>("aw_1041_cast_fp16")];
+            tensor<string, []> aw_1043_equation_0 = const()[name = tensor<string, []>("aw_1043_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1043_cast_fp16 = einsum(equation = aw_1043_equation_0, values = (var_7192_cast_fp16_1, var_7170_cast_fp16_1))[name = tensor<string, []>("aw_1043_cast_fp16")];
+            tensor<string, []> aw_1045_equation_0 = const()[name = tensor<string, []>("aw_1045_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1045_cast_fp16 = einsum(equation = aw_1045_equation_0, values = (var_7192_cast_fp16_2, var_7170_cast_fp16_2))[name = tensor<string, []>("aw_1045_cast_fp16")];
+            tensor<string, []> aw_1047_equation_0 = const()[name = tensor<string, []>("aw_1047_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1047_cast_fp16 = einsum(equation = aw_1047_equation_0, values = (var_7192_cast_fp16_3, var_7170_cast_fp16_3))[name = tensor<string, []>("aw_1047_cast_fp16")];
+            tensor<string, []> aw_1049_equation_0 = const()[name = tensor<string, []>("aw_1049_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1049_cast_fp16 = einsum(equation = aw_1049_equation_0, values = (var_7192_cast_fp16_4, var_7170_cast_fp16_4))[name = tensor<string, []>("aw_1049_cast_fp16")];
+            tensor<string, []> aw_1051_equation_0 = const()[name = tensor<string, []>("aw_1051_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1051_cast_fp16 = einsum(equation = aw_1051_equation_0, values = (var_7192_cast_fp16_5, var_7170_cast_fp16_5))[name = tensor<string, []>("aw_1051_cast_fp16")];
+            tensor<string, []> aw_1053_equation_0 = const()[name = tensor<string, []>("aw_1053_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1053_cast_fp16 = einsum(equation = aw_1053_equation_0, values = (var_7192_cast_fp16_6, var_7170_cast_fp16_6))[name = tensor<string, []>("aw_1053_cast_fp16")];
+            tensor<string, []> aw_1055_equation_0 = const()[name = tensor<string, []>("aw_1055_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1055_cast_fp16 = einsum(equation = aw_1055_equation_0, values = (var_7192_cast_fp16_7, var_7170_cast_fp16_7))[name = tensor<string, []>("aw_1055_cast_fp16")];
+            tensor<string, []> aw_1057_equation_0 = const()[name = tensor<string, []>("aw_1057_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1057_cast_fp16 = einsum(equation = aw_1057_equation_0, values = (var_7192_cast_fp16_8, var_7170_cast_fp16_8))[name = tensor<string, []>("aw_1057_cast_fp16")];
+            tensor<string, []> aw_1059_equation_0 = const()[name = tensor<string, []>("aw_1059_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1059_cast_fp16 = einsum(equation = aw_1059_equation_0, values = (var_7192_cast_fp16_9, var_7170_cast_fp16_9))[name = tensor<string, []>("aw_1059_cast_fp16")];
+            tensor<string, []> aw_1061_equation_0 = const()[name = tensor<string, []>("aw_1061_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1061_cast_fp16 = einsum(equation = aw_1061_equation_0, values = (var_7192_cast_fp16_10, var_7170_cast_fp16_10))[name = tensor<string, []>("aw_1061_cast_fp16")];
+            tensor<string, []> aw_1063_equation_0 = const()[name = tensor<string, []>("aw_1063_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1063_cast_fp16 = einsum(equation = aw_1063_equation_0, values = (var_7192_cast_fp16_11, var_7170_cast_fp16_11))[name = tensor<string, []>("aw_1063_cast_fp16")];
+            tensor<string, []> aw_1065_equation_0 = const()[name = tensor<string, []>("aw_1065_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1065_cast_fp16 = einsum(equation = aw_1065_equation_0, values = (var_7192_cast_fp16_12, var_7170_cast_fp16_12))[name = tensor<string, []>("aw_1065_cast_fp16")];
+            tensor<string, []> aw_1067_equation_0 = const()[name = tensor<string, []>("aw_1067_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1067_cast_fp16 = einsum(equation = aw_1067_equation_0, values = (var_7192_cast_fp16_13, var_7170_cast_fp16_13))[name = tensor<string, []>("aw_1067_cast_fp16")];
+            tensor<string, []> aw_1069_equation_0 = const()[name = tensor<string, []>("aw_1069_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1069_cast_fp16 = einsum(equation = aw_1069_equation_0, values = (var_7192_cast_fp16_14, var_7170_cast_fp16_14))[name = tensor<string, []>("aw_1069_cast_fp16")];
+            tensor<string, []> aw_1071_equation_0 = const()[name = tensor<string, []>("aw_1071_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1071_cast_fp16 = einsum(equation = aw_1071_equation_0, values = (var_7192_cast_fp16_15, var_7170_cast_fp16_15))[name = tensor<string, []>("aw_1071_cast_fp16")];
+            tensor<string, []> aw_1073_equation_0 = const()[name = tensor<string, []>("aw_1073_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1073_cast_fp16 = einsum(equation = aw_1073_equation_0, values = (var_7192_cast_fp16_16, var_7170_cast_fp16_16))[name = tensor<string, []>("aw_1073_cast_fp16")];
+            tensor<string, []> aw_1075_equation_0 = const()[name = tensor<string, []>("aw_1075_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1075_cast_fp16 = einsum(equation = aw_1075_equation_0, values = (var_7192_cast_fp16_17, var_7170_cast_fp16_17))[name = tensor<string, []>("aw_1075_cast_fp16")];
+            tensor<string, []> aw_1077_equation_0 = const()[name = tensor<string, []>("aw_1077_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1077_cast_fp16 = einsum(equation = aw_1077_equation_0, values = (var_7192_cast_fp16_18, var_7170_cast_fp16_18))[name = tensor<string, []>("aw_1077_cast_fp16")];
+            tensor<string, []> aw_1079_equation_0 = const()[name = tensor<string, []>("aw_1079_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1079_cast_fp16 = einsum(equation = aw_1079_equation_0, values = (var_7192_cast_fp16_19, var_7170_cast_fp16_19))[name = tensor<string, []>("aw_1079_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7274_cast_fp16 = softmax(axis = var_7118, x = aw_1041_cast_fp16)[name = tensor<string, []>("op_7274_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7275_cast_fp16 = softmax(axis = var_7118, x = aw_1043_cast_fp16)[name = tensor<string, []>("op_7275_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7276_cast_fp16 = softmax(axis = var_7118, x = aw_1045_cast_fp16)[name = tensor<string, []>("op_7276_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7277_cast_fp16 = softmax(axis = var_7118, x = aw_1047_cast_fp16)[name = tensor<string, []>("op_7277_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7278_cast_fp16 = softmax(axis = var_7118, x = aw_1049_cast_fp16)[name = tensor<string, []>("op_7278_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7279_cast_fp16 = softmax(axis = var_7118, x = aw_1051_cast_fp16)[name = tensor<string, []>("op_7279_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7280_cast_fp16 = softmax(axis = var_7118, x = aw_1053_cast_fp16)[name = tensor<string, []>("op_7280_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7281_cast_fp16 = softmax(axis = var_7118, x = aw_1055_cast_fp16)[name = tensor<string, []>("op_7281_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7282_cast_fp16 = softmax(axis = var_7118, x = aw_1057_cast_fp16)[name = tensor<string, []>("op_7282_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7283_cast_fp16 = softmax(axis = var_7118, x = aw_1059_cast_fp16)[name = tensor<string, []>("op_7283_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7284_cast_fp16 = softmax(axis = var_7118, x = aw_1061_cast_fp16)[name = tensor<string, []>("op_7284_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7285_cast_fp16 = softmax(axis = var_7118, x = aw_1063_cast_fp16)[name = tensor<string, []>("op_7285_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7286_cast_fp16 = softmax(axis = var_7118, x = aw_1065_cast_fp16)[name = tensor<string, []>("op_7286_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7287_cast_fp16 = softmax(axis = var_7118, x = aw_1067_cast_fp16)[name = tensor<string, []>("op_7287_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7288_cast_fp16 = softmax(axis = var_7118, x = aw_1069_cast_fp16)[name = tensor<string, []>("op_7288_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7289_cast_fp16 = softmax(axis = var_7118, x = aw_1071_cast_fp16)[name = tensor<string, []>("op_7289_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7290_cast_fp16 = softmax(axis = var_7118, x = aw_1073_cast_fp16)[name = tensor<string, []>("op_7290_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7291_cast_fp16 = softmax(axis = var_7118, x = aw_1075_cast_fp16)[name = tensor<string, []>("op_7291_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7292_cast_fp16 = softmax(axis = var_7118, x = aw_1077_cast_fp16)[name = tensor<string, []>("op_7292_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7293_cast_fp16 = softmax(axis = var_7118, x = aw_1079_cast_fp16)[name = tensor<string, []>("op_7293_cast_fp16")];
+            tensor<string, []> var_7295_equation_0 = const()[name = tensor<string, []>("op_7295_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7295_cast_fp16 = einsum(equation = var_7295_equation_0, values = (var_7213_cast_fp16_0, var_7274_cast_fp16))[name = tensor<string, []>("op_7295_cast_fp16")];
+            tensor<string, []> var_7297_equation_0 = const()[name = tensor<string, []>("op_7297_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7297_cast_fp16 = einsum(equation = var_7297_equation_0, values = (var_7213_cast_fp16_1, var_7275_cast_fp16))[name = tensor<string, []>("op_7297_cast_fp16")];
+            tensor<string, []> var_7299_equation_0 = const()[name = tensor<string, []>("op_7299_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7299_cast_fp16 = einsum(equation = var_7299_equation_0, values = (var_7213_cast_fp16_2, var_7276_cast_fp16))[name = tensor<string, []>("op_7299_cast_fp16")];
+            tensor<string, []> var_7301_equation_0 = const()[name = tensor<string, []>("op_7301_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7301_cast_fp16 = einsum(equation = var_7301_equation_0, values = (var_7213_cast_fp16_3, var_7277_cast_fp16))[name = tensor<string, []>("op_7301_cast_fp16")];
+            tensor<string, []> var_7303_equation_0 = const()[name = tensor<string, []>("op_7303_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7303_cast_fp16 = einsum(equation = var_7303_equation_0, values = (var_7213_cast_fp16_4, var_7278_cast_fp16))[name = tensor<string, []>("op_7303_cast_fp16")];
+            tensor<string, []> var_7305_equation_0 = const()[name = tensor<string, []>("op_7305_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7305_cast_fp16 = einsum(equation = var_7305_equation_0, values = (var_7213_cast_fp16_5, var_7279_cast_fp16))[name = tensor<string, []>("op_7305_cast_fp16")];
+            tensor<string, []> var_7307_equation_0 = const()[name = tensor<string, []>("op_7307_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7307_cast_fp16 = einsum(equation = var_7307_equation_0, values = (var_7213_cast_fp16_6, var_7280_cast_fp16))[name = tensor<string, []>("op_7307_cast_fp16")];
+            tensor<string, []> var_7309_equation_0 = const()[name = tensor<string, []>("op_7309_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7309_cast_fp16 = einsum(equation = var_7309_equation_0, values = (var_7213_cast_fp16_7, var_7281_cast_fp16))[name = tensor<string, []>("op_7309_cast_fp16")];
+            tensor<string, []> var_7311_equation_0 = const()[name = tensor<string, []>("op_7311_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7311_cast_fp16 = einsum(equation = var_7311_equation_0, values = (var_7213_cast_fp16_8, var_7282_cast_fp16))[name = tensor<string, []>("op_7311_cast_fp16")];
+            tensor<string, []> var_7313_equation_0 = const()[name = tensor<string, []>("op_7313_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7313_cast_fp16 = einsum(equation = var_7313_equation_0, values = (var_7213_cast_fp16_9, var_7283_cast_fp16))[name = tensor<string, []>("op_7313_cast_fp16")];
+            tensor<string, []> var_7315_equation_0 = const()[name = tensor<string, []>("op_7315_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7315_cast_fp16 = einsum(equation = var_7315_equation_0, values = (var_7213_cast_fp16_10, var_7284_cast_fp16))[name = tensor<string, []>("op_7315_cast_fp16")];
+            tensor<string, []> var_7317_equation_0 = const()[name = tensor<string, []>("op_7317_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7317_cast_fp16 = einsum(equation = var_7317_equation_0, values = (var_7213_cast_fp16_11, var_7285_cast_fp16))[name = tensor<string, []>("op_7317_cast_fp16")];
+            tensor<string, []> var_7319_equation_0 = const()[name = tensor<string, []>("op_7319_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7319_cast_fp16 = einsum(equation = var_7319_equation_0, values = (var_7213_cast_fp16_12, var_7286_cast_fp16))[name = tensor<string, []>("op_7319_cast_fp16")];
+            tensor<string, []> var_7321_equation_0 = const()[name = tensor<string, []>("op_7321_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7321_cast_fp16 = einsum(equation = var_7321_equation_0, values = (var_7213_cast_fp16_13, var_7287_cast_fp16))[name = tensor<string, []>("op_7321_cast_fp16")];
+            tensor<string, []> var_7323_equation_0 = const()[name = tensor<string, []>("op_7323_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7323_cast_fp16 = einsum(equation = var_7323_equation_0, values = (var_7213_cast_fp16_14, var_7288_cast_fp16))[name = tensor<string, []>("op_7323_cast_fp16")];
+            tensor<string, []> var_7325_equation_0 = const()[name = tensor<string, []>("op_7325_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7325_cast_fp16 = einsum(equation = var_7325_equation_0, values = (var_7213_cast_fp16_15, var_7289_cast_fp16))[name = tensor<string, []>("op_7325_cast_fp16")];
+            tensor<string, []> var_7327_equation_0 = const()[name = tensor<string, []>("op_7327_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7327_cast_fp16 = einsum(equation = var_7327_equation_0, values = (var_7213_cast_fp16_16, var_7290_cast_fp16))[name = tensor<string, []>("op_7327_cast_fp16")];
+            tensor<string, []> var_7329_equation_0 = const()[name = tensor<string, []>("op_7329_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7329_cast_fp16 = einsum(equation = var_7329_equation_0, values = (var_7213_cast_fp16_17, var_7291_cast_fp16))[name = tensor<string, []>("op_7329_cast_fp16")];
+            tensor<string, []> var_7331_equation_0 = const()[name = tensor<string, []>("op_7331_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7331_cast_fp16 = einsum(equation = var_7331_equation_0, values = (var_7213_cast_fp16_18, var_7292_cast_fp16))[name = tensor<string, []>("op_7331_cast_fp16")];
+            tensor<string, []> var_7333_equation_0 = const()[name = tensor<string, []>("op_7333_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7333_cast_fp16 = einsum(equation = var_7333_equation_0, values = (var_7213_cast_fp16_19, var_7293_cast_fp16))[name = tensor<string, []>("op_7333_cast_fp16")];
+            tensor<bool, []> input_265_interleave_0 = const()[name = tensor<string, []>("input_265_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_265_cast_fp16 = concat(axis = var_7118, interleave = input_265_interleave_0, values = (var_7295_cast_fp16, var_7297_cast_fp16, var_7299_cast_fp16, var_7301_cast_fp16, var_7303_cast_fp16, var_7305_cast_fp16, var_7307_cast_fp16, var_7309_cast_fp16, var_7311_cast_fp16, var_7313_cast_fp16, var_7315_cast_fp16, var_7317_cast_fp16, var_7319_cast_fp16, var_7321_cast_fp16, var_7323_cast_fp16, var_7325_cast_fp16, var_7327_cast_fp16, var_7329_cast_fp16, var_7331_cast_fp16, var_7333_cast_fp16))[name = tensor<string, []>("input_265_cast_fp16")];
+            tensor<string, []> var_7342_pad_type_0 = const()[name = tensor<string, []>("op_7342_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7342_strides_0 = const()[name = tensor<string, []>("op_7342_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7342_pad_0 = const()[name = tensor<string, []>("op_7342_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7342_dilations_0 = const()[name = tensor<string, []>("op_7342_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7342_groups_0 = const()[name = tensor<string, []>("op_7342_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_26_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_26_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1047685312)))];
+            tensor<fp16, [1280]> blocks_26_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_26_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050962176)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7342_cast_fp16 = conv(bias = blocks_26_attn_out_bias_to_fp16, dilations = var_7342_dilations_0, groups = var_7342_groups_0, pad = var_7342_pad_0, pad_type = var_7342_pad_type_0, strides = var_7342_strides_0, weight = blocks_26_attn_out_weight_to_fp16, x = input_265_cast_fp16)[name = tensor<string, []>("op_7342_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_107_cast_fp16 = add(x = inputs_105_cast_fp16, y = var_7342_cast_fp16)[name = tensor<string, []>("inputs_107_cast_fp16")];
+            tensor<int32, [1]> input_267_axes_0 = const()[name = tensor<string, []>("input_267_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_267_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_267_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050964800)))];
+            tensor<fp16, [1280]> input_267_beta_0_to_fp16 = const()[name = tensor<string, []>("input_267_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050967424)))];
+            tensor<fp16, []> var_7352_to_fp16 = const()[name = tensor<string, []>("op_7352_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_267_cast_fp16 = layer_norm(axes = input_267_axes_0, beta = input_267_beta_0_to_fp16, epsilon = var_7352_to_fp16, gamma = input_267_gamma_0_to_fp16, x = inputs_107_cast_fp16)[name = tensor<string, []>("input_267_cast_fp16")];
+            tensor<string, []> input_269_pad_type_0 = const()[name = tensor<string, []>("input_269_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_269_strides_0 = const()[name = tensor<string, []>("input_269_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_269_pad_0 = const()[name = tensor<string, []>("input_269_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_269_dilations_0 = const()[name = tensor<string, []>("input_269_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_269_groups_0 = const()[name = tensor<string, []>("input_269_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_26_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_26_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1050970048)))];
+            tensor<fp16, [5120]> blocks_26_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_26_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1064077312)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_269_cast_fp16 = conv(bias = blocks_26_mlp_0_bias_to_fp16, dilations = input_269_dilations_0, groups = input_269_groups_0, pad = input_269_pad_0, pad_type = input_269_pad_type_0, strides = input_269_strides_0, weight = blocks_26_mlp_0_weight_to_fp16, x = input_267_cast_fp16)[name = tensor<string, []>("input_269_cast_fp16")];
+            tensor<string, []> input_271_mode_0 = const()[name = tensor<string, []>("input_271_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_271_cast_fp16 = gelu(mode = input_271_mode_0, x = input_269_cast_fp16)[name = tensor<string, []>("input_271_cast_fp16")];
+            tensor<string, []> var_7378_pad_type_0 = const()[name = tensor<string, []>("op_7378_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7378_strides_0 = const()[name = tensor<string, []>("op_7378_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7378_pad_0 = const()[name = tensor<string, []>("op_7378_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7378_dilations_0 = const()[name = tensor<string, []>("op_7378_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7378_groups_0 = const()[name = tensor<string, []>("op_7378_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_26_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_26_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1064087616)))];
+            tensor<fp16, [1280]> blocks_26_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_26_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1077194880)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7378_cast_fp16 = conv(bias = blocks_26_mlp_2_bias_to_fp16, dilations = var_7378_dilations_0, groups = var_7378_groups_0, pad = var_7378_pad_0, pad_type = var_7378_pad_type_0, strides = var_7378_strides_0, weight = blocks_26_mlp_2_weight_to_fp16, x = input_271_cast_fp16)[name = tensor<string, []>("op_7378_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_109_cast_fp16 = add(x = inputs_107_cast_fp16, y = var_7378_cast_fp16)[name = tensor<string, []>("inputs_109_cast_fp16")];
+            tensor<int32, []> var_7387 = const()[name = tensor<string, []>("op_7387"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_273_axes_0 = const()[name = tensor<string, []>("input_273_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_273_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_273_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1077197504)))];
+            tensor<fp16, [1280]> input_273_beta_0_to_fp16 = const()[name = tensor<string, []>("input_273_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1077200128)))];
+            tensor<fp16, []> var_7403_to_fp16 = const()[name = tensor<string, []>("op_7403_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_273_cast_fp16 = layer_norm(axes = input_273_axes_0, beta = input_273_beta_0_to_fp16, epsilon = var_7403_to_fp16, gamma = input_273_gamma_0_to_fp16, x = inputs_109_cast_fp16)[name = tensor<string, []>("input_273_cast_fp16")];
+            tensor<string, []> q_55_pad_type_0 = const()[name = tensor<string, []>("q_55_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_55_strides_0 = const()[name = tensor<string, []>("q_55_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_55_pad_0 = const()[name = tensor<string, []>("q_55_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_55_dilations_0 = const()[name = tensor<string, []>("q_55_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_55_groups_0 = const()[name = tensor<string, []>("q_55_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_7438_weight_0_to_fp16 = const()[name = tensor<string, []>("op_7438_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1077202752)))];
+            tensor<fp16, [1280]> var_7438_bias_0_to_fp16 = const()[name = tensor<string, []>("op_7438_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1080479616)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7438_cast_fp16 = conv(bias = var_7438_bias_0_to_fp16, dilations = q_55_dilations_0, groups = q_55_groups_0, pad = q_55_pad_0, pad_type = q_55_pad_type_0, strides = q_55_strides_0, weight = var_7438_weight_0_to_fp16, x = input_273_cast_fp16)[name = tensor<string, []>("op_7438_cast_fp16")];
+            tensor<string, []> k_55_pad_type_0 = const()[name = tensor<string, []>("k_55_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_55_strides_0 = const()[name = tensor<string, []>("k_55_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_55_pad_0 = const()[name = tensor<string, []>("k_55_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_55_dilations_0 = const()[name = tensor<string, []>("k_55_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_55_groups_0 = const()[name = tensor<string, []>("k_55_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_27_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_27_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1080482240)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_55_cast_fp16 = conv(dilations = k_55_dilations_0, groups = k_55_groups_0, pad = k_55_pad_0, pad_type = k_55_pad_type_0, strides = k_55_strides_0, weight = blocks_27_attn_key_weight_to_fp16, x = input_273_cast_fp16)[name = tensor<string, []>("k_55_cast_fp16")];
+            tensor<string, []> var_7436_pad_type_0 = const()[name = tensor<string, []>("op_7436_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7436_strides_0 = const()[name = tensor<string, []>("op_7436_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7436_pad_0 = const()[name = tensor<string, []>("op_7436_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7436_dilations_0 = const()[name = tensor<string, []>("op_7436_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7436_groups_0 = const()[name = tensor<string, []>("op_7436_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_27_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_27_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1083759104)))];
+            tensor<fp16, [1280]> blocks_27_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_27_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1087035968)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7436_cast_fp16 = conv(bias = blocks_27_attn_value_bias_to_fp16, dilations = var_7436_dilations_0, groups = var_7436_groups_0, pad = var_7436_pad_0, pad_type = var_7436_pad_type_0, strides = var_7436_strides_0, weight = blocks_27_attn_value_weight_to_fp16, x = input_273_cast_fp16)[name = tensor<string, []>("op_7436_cast_fp16")];
+            tensor<int32, [20]> tile_81 = const()[name = tensor<string, []>("tile_81"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7439_axis_0 = const()[name = tensor<string, []>("op_7439_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7439_cast_fp16_19 = split(axis = var_7439_axis_0, split_sizes = tile_81, x = var_7438_cast_fp16)[name = tensor<string, []>("op_7439_cast_fp16")];
+            tensor<int32, [4]> var_7460_perm_0 = const()[name = tensor<string, []>("op_7460_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_82 = const()[name = tensor<string, []>("tile_82"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7461_axis_0 = const()[name = tensor<string, []>("op_7461_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_7460_cast_fp16 = transpose(perm = var_7460_perm_0, x = k_55_cast_fp16)[name = tensor<string, []>("transpose_5")];
+            tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_7461_cast_fp16_19 = split(axis = var_7461_axis_0, split_sizes = tile_82, x = var_7460_cast_fp16)[name = tensor<string, []>("op_7461_cast_fp16")];
+            tensor<int32, [20]> tile_83 = const()[name = tensor<string, []>("tile_83"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7482_axis_0 = const()[name = tensor<string, []>("op_7482_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7482_cast_fp16_19 = split(axis = var_7482_axis_0, split_sizes = tile_83, x = var_7436_cast_fp16)[name = tensor<string, []>("op_7482_cast_fp16")];
+            tensor<string, []> aw_1081_equation_0 = const()[name = tensor<string, []>("aw_1081_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1081_cast_fp16 = einsum(equation = aw_1081_equation_0, values = (var_7461_cast_fp16_0, var_7439_cast_fp16_0))[name = tensor<string, []>("aw_1081_cast_fp16")];
+            tensor<string, []> aw_1083_equation_0 = const()[name = tensor<string, []>("aw_1083_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1083_cast_fp16 = einsum(equation = aw_1083_equation_0, values = (var_7461_cast_fp16_1, var_7439_cast_fp16_1))[name = tensor<string, []>("aw_1083_cast_fp16")];
+            tensor<string, []> aw_1085_equation_0 = const()[name = tensor<string, []>("aw_1085_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1085_cast_fp16 = einsum(equation = aw_1085_equation_0, values = (var_7461_cast_fp16_2, var_7439_cast_fp16_2))[name = tensor<string, []>("aw_1085_cast_fp16")];
+            tensor<string, []> aw_1087_equation_0 = const()[name = tensor<string, []>("aw_1087_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1087_cast_fp16 = einsum(equation = aw_1087_equation_0, values = (var_7461_cast_fp16_3, var_7439_cast_fp16_3))[name = tensor<string, []>("aw_1087_cast_fp16")];
+            tensor<string, []> aw_1089_equation_0 = const()[name = tensor<string, []>("aw_1089_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1089_cast_fp16 = einsum(equation = aw_1089_equation_0, values = (var_7461_cast_fp16_4, var_7439_cast_fp16_4))[name = tensor<string, []>("aw_1089_cast_fp16")];
+            tensor<string, []> aw_1091_equation_0 = const()[name = tensor<string, []>("aw_1091_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1091_cast_fp16 = einsum(equation = aw_1091_equation_0, values = (var_7461_cast_fp16_5, var_7439_cast_fp16_5))[name = tensor<string, []>("aw_1091_cast_fp16")];
+            tensor<string, []> aw_1093_equation_0 = const()[name = tensor<string, []>("aw_1093_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1093_cast_fp16 = einsum(equation = aw_1093_equation_0, values = (var_7461_cast_fp16_6, var_7439_cast_fp16_6))[name = tensor<string, []>("aw_1093_cast_fp16")];
+            tensor<string, []> aw_1095_equation_0 = const()[name = tensor<string, []>("aw_1095_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1095_cast_fp16 = einsum(equation = aw_1095_equation_0, values = (var_7461_cast_fp16_7, var_7439_cast_fp16_7))[name = tensor<string, []>("aw_1095_cast_fp16")];
+            tensor<string, []> aw_1097_equation_0 = const()[name = tensor<string, []>("aw_1097_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1097_cast_fp16 = einsum(equation = aw_1097_equation_0, values = (var_7461_cast_fp16_8, var_7439_cast_fp16_8))[name = tensor<string, []>("aw_1097_cast_fp16")];
+            tensor<string, []> aw_1099_equation_0 = const()[name = tensor<string, []>("aw_1099_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1099_cast_fp16 = einsum(equation = aw_1099_equation_0, values = (var_7461_cast_fp16_9, var_7439_cast_fp16_9))[name = tensor<string, []>("aw_1099_cast_fp16")];
+            tensor<string, []> aw_1101_equation_0 = const()[name = tensor<string, []>("aw_1101_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1101_cast_fp16 = einsum(equation = aw_1101_equation_0, values = (var_7461_cast_fp16_10, var_7439_cast_fp16_10))[name = tensor<string, []>("aw_1101_cast_fp16")];
+            tensor<string, []> aw_1103_equation_0 = const()[name = tensor<string, []>("aw_1103_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1103_cast_fp16 = einsum(equation = aw_1103_equation_0, values = (var_7461_cast_fp16_11, var_7439_cast_fp16_11))[name = tensor<string, []>("aw_1103_cast_fp16")];
+            tensor<string, []> aw_1105_equation_0 = const()[name = tensor<string, []>("aw_1105_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1105_cast_fp16 = einsum(equation = aw_1105_equation_0, values = (var_7461_cast_fp16_12, var_7439_cast_fp16_12))[name = tensor<string, []>("aw_1105_cast_fp16")];
+            tensor<string, []> aw_1107_equation_0 = const()[name = tensor<string, []>("aw_1107_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1107_cast_fp16 = einsum(equation = aw_1107_equation_0, values = (var_7461_cast_fp16_13, var_7439_cast_fp16_13))[name = tensor<string, []>("aw_1107_cast_fp16")];
+            tensor<string, []> aw_1109_equation_0 = const()[name = tensor<string, []>("aw_1109_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1109_cast_fp16 = einsum(equation = aw_1109_equation_0, values = (var_7461_cast_fp16_14, var_7439_cast_fp16_14))[name = tensor<string, []>("aw_1109_cast_fp16")];
+            tensor<string, []> aw_1111_equation_0 = const()[name = tensor<string, []>("aw_1111_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1111_cast_fp16 = einsum(equation = aw_1111_equation_0, values = (var_7461_cast_fp16_15, var_7439_cast_fp16_15))[name = tensor<string, []>("aw_1111_cast_fp16")];
+            tensor<string, []> aw_1113_equation_0 = const()[name = tensor<string, []>("aw_1113_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1113_cast_fp16 = einsum(equation = aw_1113_equation_0, values = (var_7461_cast_fp16_16, var_7439_cast_fp16_16))[name = tensor<string, []>("aw_1113_cast_fp16")];
+            tensor<string, []> aw_1115_equation_0 = const()[name = tensor<string, []>("aw_1115_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1115_cast_fp16 = einsum(equation = aw_1115_equation_0, values = (var_7461_cast_fp16_17, var_7439_cast_fp16_17))[name = tensor<string, []>("aw_1115_cast_fp16")];
+            tensor<string, []> aw_1117_equation_0 = const()[name = tensor<string, []>("aw_1117_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1117_cast_fp16 = einsum(equation = aw_1117_equation_0, values = (var_7461_cast_fp16_18, var_7439_cast_fp16_18))[name = tensor<string, []>("aw_1117_cast_fp16")];
+            tensor<string, []> aw_1119_equation_0 = const()[name = tensor<string, []>("aw_1119_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1119_cast_fp16 = einsum(equation = aw_1119_equation_0, values = (var_7461_cast_fp16_19, var_7439_cast_fp16_19))[name = tensor<string, []>("aw_1119_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7543_cast_fp16 = softmax(axis = var_7387, x = aw_1081_cast_fp16)[name = tensor<string, []>("op_7543_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7544_cast_fp16 = softmax(axis = var_7387, x = aw_1083_cast_fp16)[name = tensor<string, []>("op_7544_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7545_cast_fp16 = softmax(axis = var_7387, x = aw_1085_cast_fp16)[name = tensor<string, []>("op_7545_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7546_cast_fp16 = softmax(axis = var_7387, x = aw_1087_cast_fp16)[name = tensor<string, []>("op_7546_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7547_cast_fp16 = softmax(axis = var_7387, x = aw_1089_cast_fp16)[name = tensor<string, []>("op_7547_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7548_cast_fp16 = softmax(axis = var_7387, x = aw_1091_cast_fp16)[name = tensor<string, []>("op_7548_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7549_cast_fp16 = softmax(axis = var_7387, x = aw_1093_cast_fp16)[name = tensor<string, []>("op_7549_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7550_cast_fp16 = softmax(axis = var_7387, x = aw_1095_cast_fp16)[name = tensor<string, []>("op_7550_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7551_cast_fp16 = softmax(axis = var_7387, x = aw_1097_cast_fp16)[name = tensor<string, []>("op_7551_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7552_cast_fp16 = softmax(axis = var_7387, x = aw_1099_cast_fp16)[name = tensor<string, []>("op_7552_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7553_cast_fp16 = softmax(axis = var_7387, x = aw_1101_cast_fp16)[name = tensor<string, []>("op_7553_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7554_cast_fp16 = softmax(axis = var_7387, x = aw_1103_cast_fp16)[name = tensor<string, []>("op_7554_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7555_cast_fp16 = softmax(axis = var_7387, x = aw_1105_cast_fp16)[name = tensor<string, []>("op_7555_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7556_cast_fp16 = softmax(axis = var_7387, x = aw_1107_cast_fp16)[name = tensor<string, []>("op_7556_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7557_cast_fp16 = softmax(axis = var_7387, x = aw_1109_cast_fp16)[name = tensor<string, []>("op_7557_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7558_cast_fp16 = softmax(axis = var_7387, x = aw_1111_cast_fp16)[name = tensor<string, []>("op_7558_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7559_cast_fp16 = softmax(axis = var_7387, x = aw_1113_cast_fp16)[name = tensor<string, []>("op_7559_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7560_cast_fp16 = softmax(axis = var_7387, x = aw_1115_cast_fp16)[name = tensor<string, []>("op_7560_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7561_cast_fp16 = softmax(axis = var_7387, x = aw_1117_cast_fp16)[name = tensor<string, []>("op_7561_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7562_cast_fp16 = softmax(axis = var_7387, x = aw_1119_cast_fp16)[name = tensor<string, []>("op_7562_cast_fp16")];
+            tensor<string, []> var_7564_equation_0 = const()[name = tensor<string, []>("op_7564_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7564_cast_fp16 = einsum(equation = var_7564_equation_0, values = (var_7482_cast_fp16_0, var_7543_cast_fp16))[name = tensor<string, []>("op_7564_cast_fp16")];
+            tensor<string, []> var_7566_equation_0 = const()[name = tensor<string, []>("op_7566_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7566_cast_fp16 = einsum(equation = var_7566_equation_0, values = (var_7482_cast_fp16_1, var_7544_cast_fp16))[name = tensor<string, []>("op_7566_cast_fp16")];
+            tensor<string, []> var_7568_equation_0 = const()[name = tensor<string, []>("op_7568_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7568_cast_fp16 = einsum(equation = var_7568_equation_0, values = (var_7482_cast_fp16_2, var_7545_cast_fp16))[name = tensor<string, []>("op_7568_cast_fp16")];
+            tensor<string, []> var_7570_equation_0 = const()[name = tensor<string, []>("op_7570_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7570_cast_fp16 = einsum(equation = var_7570_equation_0, values = (var_7482_cast_fp16_3, var_7546_cast_fp16))[name = tensor<string, []>("op_7570_cast_fp16")];
+            tensor<string, []> var_7572_equation_0 = const()[name = tensor<string, []>("op_7572_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7572_cast_fp16 = einsum(equation = var_7572_equation_0, values = (var_7482_cast_fp16_4, var_7547_cast_fp16))[name = tensor<string, []>("op_7572_cast_fp16")];
+            tensor<string, []> var_7574_equation_0 = const()[name = tensor<string, []>("op_7574_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7574_cast_fp16 = einsum(equation = var_7574_equation_0, values = (var_7482_cast_fp16_5, var_7548_cast_fp16))[name = tensor<string, []>("op_7574_cast_fp16")];
+            tensor<string, []> var_7576_equation_0 = const()[name = tensor<string, []>("op_7576_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7576_cast_fp16 = einsum(equation = var_7576_equation_0, values = (var_7482_cast_fp16_6, var_7549_cast_fp16))[name = tensor<string, []>("op_7576_cast_fp16")];
+            tensor<string, []> var_7578_equation_0 = const()[name = tensor<string, []>("op_7578_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7578_cast_fp16 = einsum(equation = var_7578_equation_0, values = (var_7482_cast_fp16_7, var_7550_cast_fp16))[name = tensor<string, []>("op_7578_cast_fp16")];
+            tensor<string, []> var_7580_equation_0 = const()[name = tensor<string, []>("op_7580_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7580_cast_fp16 = einsum(equation = var_7580_equation_0, values = (var_7482_cast_fp16_8, var_7551_cast_fp16))[name = tensor<string, []>("op_7580_cast_fp16")];
+            tensor<string, []> var_7582_equation_0 = const()[name = tensor<string, []>("op_7582_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7582_cast_fp16 = einsum(equation = var_7582_equation_0, values = (var_7482_cast_fp16_9, var_7552_cast_fp16))[name = tensor<string, []>("op_7582_cast_fp16")];
+            tensor<string, []> var_7584_equation_0 = const()[name = tensor<string, []>("op_7584_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7584_cast_fp16 = einsum(equation = var_7584_equation_0, values = (var_7482_cast_fp16_10, var_7553_cast_fp16))[name = tensor<string, []>("op_7584_cast_fp16")];
+            tensor<string, []> var_7586_equation_0 = const()[name = tensor<string, []>("op_7586_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7586_cast_fp16 = einsum(equation = var_7586_equation_0, values = (var_7482_cast_fp16_11, var_7554_cast_fp16))[name = tensor<string, []>("op_7586_cast_fp16")];
+            tensor<string, []> var_7588_equation_0 = const()[name = tensor<string, []>("op_7588_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7588_cast_fp16 = einsum(equation = var_7588_equation_0, values = (var_7482_cast_fp16_12, var_7555_cast_fp16))[name = tensor<string, []>("op_7588_cast_fp16")];
+            tensor<string, []> var_7590_equation_0 = const()[name = tensor<string, []>("op_7590_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7590_cast_fp16 = einsum(equation = var_7590_equation_0, values = (var_7482_cast_fp16_13, var_7556_cast_fp16))[name = tensor<string, []>("op_7590_cast_fp16")];
+            tensor<string, []> var_7592_equation_0 = const()[name = tensor<string, []>("op_7592_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7592_cast_fp16 = einsum(equation = var_7592_equation_0, values = (var_7482_cast_fp16_14, var_7557_cast_fp16))[name = tensor<string, []>("op_7592_cast_fp16")];
+            tensor<string, []> var_7594_equation_0 = const()[name = tensor<string, []>("op_7594_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7594_cast_fp16 = einsum(equation = var_7594_equation_0, values = (var_7482_cast_fp16_15, var_7558_cast_fp16))[name = tensor<string, []>("op_7594_cast_fp16")];
+            tensor<string, []> var_7596_equation_0 = const()[name = tensor<string, []>("op_7596_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7596_cast_fp16 = einsum(equation = var_7596_equation_0, values = (var_7482_cast_fp16_16, var_7559_cast_fp16))[name = tensor<string, []>("op_7596_cast_fp16")];
+            tensor<string, []> var_7598_equation_0 = const()[name = tensor<string, []>("op_7598_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7598_cast_fp16 = einsum(equation = var_7598_equation_0, values = (var_7482_cast_fp16_17, var_7560_cast_fp16))[name = tensor<string, []>("op_7598_cast_fp16")];
+            tensor<string, []> var_7600_equation_0 = const()[name = tensor<string, []>("op_7600_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7600_cast_fp16 = einsum(equation = var_7600_equation_0, values = (var_7482_cast_fp16_18, var_7561_cast_fp16))[name = tensor<string, []>("op_7600_cast_fp16")];
+            tensor<string, []> var_7602_equation_0 = const()[name = tensor<string, []>("op_7602_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7602_cast_fp16 = einsum(equation = var_7602_equation_0, values = (var_7482_cast_fp16_19, var_7562_cast_fp16))[name = tensor<string, []>("op_7602_cast_fp16")];
+            tensor<bool, []> input_275_interleave_0 = const()[name = tensor<string, []>("input_275_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_275_cast_fp16 = concat(axis = var_7387, interleave = input_275_interleave_0, values = (var_7564_cast_fp16, var_7566_cast_fp16, var_7568_cast_fp16, var_7570_cast_fp16, var_7572_cast_fp16, var_7574_cast_fp16, var_7576_cast_fp16, var_7578_cast_fp16, var_7580_cast_fp16, var_7582_cast_fp16, var_7584_cast_fp16, var_7586_cast_fp16, var_7588_cast_fp16, var_7590_cast_fp16, var_7592_cast_fp16, var_7594_cast_fp16, var_7596_cast_fp16, var_7598_cast_fp16, var_7600_cast_fp16, var_7602_cast_fp16))[name = tensor<string, []>("input_275_cast_fp16")];
+            tensor<string, []> var_7611_pad_type_0 = const()[name = tensor<string, []>("op_7611_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7611_strides_0 = const()[name = tensor<string, []>("op_7611_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7611_pad_0 = const()[name = tensor<string, []>("op_7611_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7611_dilations_0 = const()[name = tensor<string, []>("op_7611_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7611_groups_0 = const()[name = tensor<string, []>("op_7611_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_27_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_27_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1087038592)))];
+            tensor<fp16, [1280]> blocks_27_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_27_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1090315456)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7611_cast_fp16 = conv(bias = blocks_27_attn_out_bias_to_fp16, dilations = var_7611_dilations_0, groups = var_7611_groups_0, pad = var_7611_pad_0, pad_type = var_7611_pad_type_0, strides = var_7611_strides_0, weight = blocks_27_attn_out_weight_to_fp16, x = input_275_cast_fp16)[name = tensor<string, []>("op_7611_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_111_cast_fp16 = add(x = inputs_109_cast_fp16, y = var_7611_cast_fp16)[name = tensor<string, []>("inputs_111_cast_fp16")];
+            tensor<int32, [1]> input_277_axes_0 = const()[name = tensor<string, []>("input_277_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_277_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_277_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1090318080)))];
+            tensor<fp16, [1280]> input_277_beta_0_to_fp16 = const()[name = tensor<string, []>("input_277_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1090320704)))];
+            tensor<fp16, []> var_7621_to_fp16 = const()[name = tensor<string, []>("op_7621_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_277_cast_fp16 = layer_norm(axes = input_277_axes_0, beta = input_277_beta_0_to_fp16, epsilon = var_7621_to_fp16, gamma = input_277_gamma_0_to_fp16, x = inputs_111_cast_fp16)[name = tensor<string, []>("input_277_cast_fp16")];
+            tensor<string, []> input_279_pad_type_0 = const()[name = tensor<string, []>("input_279_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_279_strides_0 = const()[name = tensor<string, []>("input_279_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_279_pad_0 = const()[name = tensor<string, []>("input_279_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_279_dilations_0 = const()[name = tensor<string, []>("input_279_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_279_groups_0 = const()[name = tensor<string, []>("input_279_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_27_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_27_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1090323328)))];
+            tensor<fp16, [5120]> blocks_27_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_27_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1103430592)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_279_cast_fp16 = conv(bias = blocks_27_mlp_0_bias_to_fp16, dilations = input_279_dilations_0, groups = input_279_groups_0, pad = input_279_pad_0, pad_type = input_279_pad_type_0, strides = input_279_strides_0, weight = blocks_27_mlp_0_weight_to_fp16, x = input_277_cast_fp16)[name = tensor<string, []>("input_279_cast_fp16")];
+            tensor<string, []> input_281_mode_0 = const()[name = tensor<string, []>("input_281_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_281_cast_fp16 = gelu(mode = input_281_mode_0, x = input_279_cast_fp16)[name = tensor<string, []>("input_281_cast_fp16")];
+            tensor<string, []> var_7647_pad_type_0 = const()[name = tensor<string, []>("op_7647_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7647_strides_0 = const()[name = tensor<string, []>("op_7647_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7647_pad_0 = const()[name = tensor<string, []>("op_7647_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7647_dilations_0 = const()[name = tensor<string, []>("op_7647_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7647_groups_0 = const()[name = tensor<string, []>("op_7647_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_27_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_27_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1103440896)))];
+            tensor<fp16, [1280]> blocks_27_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_27_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1116548160)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7647_cast_fp16 = conv(bias = blocks_27_mlp_2_bias_to_fp16, dilations = var_7647_dilations_0, groups = var_7647_groups_0, pad = var_7647_pad_0, pad_type = var_7647_pad_type_0, strides = var_7647_strides_0, weight = blocks_27_mlp_2_weight_to_fp16, x = input_281_cast_fp16)[name = tensor<string, []>("op_7647_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_113_cast_fp16 = add(x = inputs_111_cast_fp16, y = var_7647_cast_fp16)[name = tensor<string, []>("inputs_113_cast_fp16")];
+            tensor<int32, []> var_7656 = const()[name = tensor<string, []>("op_7656"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_283_axes_0 = const()[name = tensor<string, []>("input_283_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_283_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_283_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1116550784)))];
+            tensor<fp16, [1280]> input_283_beta_0_to_fp16 = const()[name = tensor<string, []>("input_283_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1116553408)))];
+            tensor<fp16, []> var_7672_to_fp16 = const()[name = tensor<string, []>("op_7672_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_283_cast_fp16 = layer_norm(axes = input_283_axes_0, beta = input_283_beta_0_to_fp16, epsilon = var_7672_to_fp16, gamma = input_283_gamma_0_to_fp16, x = inputs_113_cast_fp16)[name = tensor<string, []>("input_283_cast_fp16")];
+            tensor<string, []> q_57_pad_type_0 = const()[name = tensor<string, []>("q_57_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_57_strides_0 = const()[name = tensor<string, []>("q_57_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_57_pad_0 = const()[name = tensor<string, []>("q_57_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_57_dilations_0 = const()[name = tensor<string, []>("q_57_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_57_groups_0 = const()[name = tensor<string, []>("q_57_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_7707_weight_0_to_fp16 = const()[name = tensor<string, []>("op_7707_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1116556032)))];
+            tensor<fp16, [1280]> var_7707_bias_0_to_fp16 = const()[name = tensor<string, []>("op_7707_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1119832896)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7707_cast_fp16 = conv(bias = var_7707_bias_0_to_fp16, dilations = q_57_dilations_0, groups = q_57_groups_0, pad = q_57_pad_0, pad_type = q_57_pad_type_0, strides = q_57_strides_0, weight = var_7707_weight_0_to_fp16, x = input_283_cast_fp16)[name = tensor<string, []>("op_7707_cast_fp16")];
+            tensor<string, []> k_57_pad_type_0 = const()[name = tensor<string, []>("k_57_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_57_strides_0 = const()[name = tensor<string, []>("k_57_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_57_pad_0 = const()[name = tensor<string, []>("k_57_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_57_dilations_0 = const()[name = tensor<string, []>("k_57_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_57_groups_0 = const()[name = tensor<string, []>("k_57_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_28_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_28_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1119835520)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_57_cast_fp16 = conv(dilations = k_57_dilations_0, groups = k_57_groups_0, pad = k_57_pad_0, pad_type = k_57_pad_type_0, strides = k_57_strides_0, weight = blocks_28_attn_key_weight_to_fp16, x = input_283_cast_fp16)[name = tensor<string, []>("k_57_cast_fp16")];
+            tensor<string, []> var_7705_pad_type_0 = const()[name = tensor<string, []>("op_7705_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7705_strides_0 = const()[name = tensor<string, []>("op_7705_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7705_pad_0 = const()[name = tensor<string, []>("op_7705_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7705_dilations_0 = const()[name = tensor<string, []>("op_7705_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7705_groups_0 = const()[name = tensor<string, []>("op_7705_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_28_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_28_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1123112384)))];
+            tensor<fp16, [1280]> blocks_28_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_28_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1126389248)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7705_cast_fp16 = conv(bias = blocks_28_attn_value_bias_to_fp16, dilations = var_7705_dilations_0, groups = var_7705_groups_0, pad = var_7705_pad_0, pad_type = var_7705_pad_type_0, strides = var_7705_strides_0, weight = blocks_28_attn_value_weight_to_fp16, x = input_283_cast_fp16)[name = tensor<string, []>("op_7705_cast_fp16")];
+            tensor<int32, [20]> tile_84 = const()[name = tensor<string, []>("tile_84"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7708_axis_0 = const()[name = tensor<string, []>("op_7708_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7708_cast_fp16_19 = split(axis = var_7708_axis_0, split_sizes = tile_84, x = var_7707_cast_fp16)[name = tensor<string, []>("op_7708_cast_fp16")];
+            tensor<int32, [4]> var_7729_perm_0 = const()[name = tensor<string, []>("op_7729_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_85 = const()[name = tensor<string, []>("tile_85"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7730_axis_0 = const()[name = tensor<string, []>("op_7730_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_7729_cast_fp16 = transpose(perm = var_7729_perm_0, x = k_57_cast_fp16)[name = tensor<string, []>("transpose_4")];
+            tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_7730_cast_fp16_19 = split(axis = var_7730_axis_0, split_sizes = tile_85, x = var_7729_cast_fp16)[name = tensor<string, []>("op_7730_cast_fp16")];
+            tensor<int32, [20]> tile_86 = const()[name = tensor<string, []>("tile_86"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7751_axis_0 = const()[name = tensor<string, []>("op_7751_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7751_cast_fp16_19 = split(axis = var_7751_axis_0, split_sizes = tile_86, x = var_7705_cast_fp16)[name = tensor<string, []>("op_7751_cast_fp16")];
+            tensor<string, []> aw_1121_equation_0 = const()[name = tensor<string, []>("aw_1121_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1121_cast_fp16 = einsum(equation = aw_1121_equation_0, values = (var_7730_cast_fp16_0, var_7708_cast_fp16_0))[name = tensor<string, []>("aw_1121_cast_fp16")];
+            tensor<string, []> aw_1123_equation_0 = const()[name = tensor<string, []>("aw_1123_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1123_cast_fp16 = einsum(equation = aw_1123_equation_0, values = (var_7730_cast_fp16_1, var_7708_cast_fp16_1))[name = tensor<string, []>("aw_1123_cast_fp16")];
+            tensor<string, []> aw_1125_equation_0 = const()[name = tensor<string, []>("aw_1125_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1125_cast_fp16 = einsum(equation = aw_1125_equation_0, values = (var_7730_cast_fp16_2, var_7708_cast_fp16_2))[name = tensor<string, []>("aw_1125_cast_fp16")];
+            tensor<string, []> aw_1127_equation_0 = const()[name = tensor<string, []>("aw_1127_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1127_cast_fp16 = einsum(equation = aw_1127_equation_0, values = (var_7730_cast_fp16_3, var_7708_cast_fp16_3))[name = tensor<string, []>("aw_1127_cast_fp16")];
+            tensor<string, []> aw_1129_equation_0 = const()[name = tensor<string, []>("aw_1129_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1129_cast_fp16 = einsum(equation = aw_1129_equation_0, values = (var_7730_cast_fp16_4, var_7708_cast_fp16_4))[name = tensor<string, []>("aw_1129_cast_fp16")];
+            tensor<string, []> aw_1131_equation_0 = const()[name = tensor<string, []>("aw_1131_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1131_cast_fp16 = einsum(equation = aw_1131_equation_0, values = (var_7730_cast_fp16_5, var_7708_cast_fp16_5))[name = tensor<string, []>("aw_1131_cast_fp16")];
+            tensor<string, []> aw_1133_equation_0 = const()[name = tensor<string, []>("aw_1133_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1133_cast_fp16 = einsum(equation = aw_1133_equation_0, values = (var_7730_cast_fp16_6, var_7708_cast_fp16_6))[name = tensor<string, []>("aw_1133_cast_fp16")];
+            tensor<string, []> aw_1135_equation_0 = const()[name = tensor<string, []>("aw_1135_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1135_cast_fp16 = einsum(equation = aw_1135_equation_0, values = (var_7730_cast_fp16_7, var_7708_cast_fp16_7))[name = tensor<string, []>("aw_1135_cast_fp16")];
+            tensor<string, []> aw_1137_equation_0 = const()[name = tensor<string, []>("aw_1137_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1137_cast_fp16 = einsum(equation = aw_1137_equation_0, values = (var_7730_cast_fp16_8, var_7708_cast_fp16_8))[name = tensor<string, []>("aw_1137_cast_fp16")];
+            tensor<string, []> aw_1139_equation_0 = const()[name = tensor<string, []>("aw_1139_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1139_cast_fp16 = einsum(equation = aw_1139_equation_0, values = (var_7730_cast_fp16_9, var_7708_cast_fp16_9))[name = tensor<string, []>("aw_1139_cast_fp16")];
+            tensor<string, []> aw_1141_equation_0 = const()[name = tensor<string, []>("aw_1141_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1141_cast_fp16 = einsum(equation = aw_1141_equation_0, values = (var_7730_cast_fp16_10, var_7708_cast_fp16_10))[name = tensor<string, []>("aw_1141_cast_fp16")];
+            tensor<string, []> aw_1143_equation_0 = const()[name = tensor<string, []>("aw_1143_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1143_cast_fp16 = einsum(equation = aw_1143_equation_0, values = (var_7730_cast_fp16_11, var_7708_cast_fp16_11))[name = tensor<string, []>("aw_1143_cast_fp16")];
+            tensor<string, []> aw_1145_equation_0 = const()[name = tensor<string, []>("aw_1145_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1145_cast_fp16 = einsum(equation = aw_1145_equation_0, values = (var_7730_cast_fp16_12, var_7708_cast_fp16_12))[name = tensor<string, []>("aw_1145_cast_fp16")];
+            tensor<string, []> aw_1147_equation_0 = const()[name = tensor<string, []>("aw_1147_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1147_cast_fp16 = einsum(equation = aw_1147_equation_0, values = (var_7730_cast_fp16_13, var_7708_cast_fp16_13))[name = tensor<string, []>("aw_1147_cast_fp16")];
+            tensor<string, []> aw_1149_equation_0 = const()[name = tensor<string, []>("aw_1149_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1149_cast_fp16 = einsum(equation = aw_1149_equation_0, values = (var_7730_cast_fp16_14, var_7708_cast_fp16_14))[name = tensor<string, []>("aw_1149_cast_fp16")];
+            tensor<string, []> aw_1151_equation_0 = const()[name = tensor<string, []>("aw_1151_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1151_cast_fp16 = einsum(equation = aw_1151_equation_0, values = (var_7730_cast_fp16_15, var_7708_cast_fp16_15))[name = tensor<string, []>("aw_1151_cast_fp16")];
+            tensor<string, []> aw_1153_equation_0 = const()[name = tensor<string, []>("aw_1153_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1153_cast_fp16 = einsum(equation = aw_1153_equation_0, values = (var_7730_cast_fp16_16, var_7708_cast_fp16_16))[name = tensor<string, []>("aw_1153_cast_fp16")];
+            tensor<string, []> aw_1155_equation_0 = const()[name = tensor<string, []>("aw_1155_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1155_cast_fp16 = einsum(equation = aw_1155_equation_0, values = (var_7730_cast_fp16_17, var_7708_cast_fp16_17))[name = tensor<string, []>("aw_1155_cast_fp16")];
+            tensor<string, []> aw_1157_equation_0 = const()[name = tensor<string, []>("aw_1157_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1157_cast_fp16 = einsum(equation = aw_1157_equation_0, values = (var_7730_cast_fp16_18, var_7708_cast_fp16_18))[name = tensor<string, []>("aw_1157_cast_fp16")];
+            tensor<string, []> aw_1159_equation_0 = const()[name = tensor<string, []>("aw_1159_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1159_cast_fp16 = einsum(equation = aw_1159_equation_0, values = (var_7730_cast_fp16_19, var_7708_cast_fp16_19))[name = tensor<string, []>("aw_1159_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7812_cast_fp16 = softmax(axis = var_7656, x = aw_1121_cast_fp16)[name = tensor<string, []>("op_7812_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7813_cast_fp16 = softmax(axis = var_7656, x = aw_1123_cast_fp16)[name = tensor<string, []>("op_7813_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7814_cast_fp16 = softmax(axis = var_7656, x = aw_1125_cast_fp16)[name = tensor<string, []>("op_7814_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7815_cast_fp16 = softmax(axis = var_7656, x = aw_1127_cast_fp16)[name = tensor<string, []>("op_7815_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7816_cast_fp16 = softmax(axis = var_7656, x = aw_1129_cast_fp16)[name = tensor<string, []>("op_7816_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7817_cast_fp16 = softmax(axis = var_7656, x = aw_1131_cast_fp16)[name = tensor<string, []>("op_7817_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7818_cast_fp16 = softmax(axis = var_7656, x = aw_1133_cast_fp16)[name = tensor<string, []>("op_7818_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7819_cast_fp16 = softmax(axis = var_7656, x = aw_1135_cast_fp16)[name = tensor<string, []>("op_7819_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7820_cast_fp16 = softmax(axis = var_7656, x = aw_1137_cast_fp16)[name = tensor<string, []>("op_7820_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7821_cast_fp16 = softmax(axis = var_7656, x = aw_1139_cast_fp16)[name = tensor<string, []>("op_7821_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7822_cast_fp16 = softmax(axis = var_7656, x = aw_1141_cast_fp16)[name = tensor<string, []>("op_7822_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7823_cast_fp16 = softmax(axis = var_7656, x = aw_1143_cast_fp16)[name = tensor<string, []>("op_7823_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7824_cast_fp16 = softmax(axis = var_7656, x = aw_1145_cast_fp16)[name = tensor<string, []>("op_7824_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7825_cast_fp16 = softmax(axis = var_7656, x = aw_1147_cast_fp16)[name = tensor<string, []>("op_7825_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7826_cast_fp16 = softmax(axis = var_7656, x = aw_1149_cast_fp16)[name = tensor<string, []>("op_7826_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7827_cast_fp16 = softmax(axis = var_7656, x = aw_1151_cast_fp16)[name = tensor<string, []>("op_7827_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7828_cast_fp16 = softmax(axis = var_7656, x = aw_1153_cast_fp16)[name = tensor<string, []>("op_7828_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7829_cast_fp16 = softmax(axis = var_7656, x = aw_1155_cast_fp16)[name = tensor<string, []>("op_7829_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7830_cast_fp16 = softmax(axis = var_7656, x = aw_1157_cast_fp16)[name = tensor<string, []>("op_7830_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_7831_cast_fp16 = softmax(axis = var_7656, x = aw_1159_cast_fp16)[name = tensor<string, []>("op_7831_cast_fp16")];
+            tensor<string, []> var_7833_equation_0 = const()[name = tensor<string, []>("op_7833_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7833_cast_fp16 = einsum(equation = var_7833_equation_0, values = (var_7751_cast_fp16_0, var_7812_cast_fp16))[name = tensor<string, []>("op_7833_cast_fp16")];
+            tensor<string, []> var_7835_equation_0 = const()[name = tensor<string, []>("op_7835_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7835_cast_fp16 = einsum(equation = var_7835_equation_0, values = (var_7751_cast_fp16_1, var_7813_cast_fp16))[name = tensor<string, []>("op_7835_cast_fp16")];
+            tensor<string, []> var_7837_equation_0 = const()[name = tensor<string, []>("op_7837_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7837_cast_fp16 = einsum(equation = var_7837_equation_0, values = (var_7751_cast_fp16_2, var_7814_cast_fp16))[name = tensor<string, []>("op_7837_cast_fp16")];
+            tensor<string, []> var_7839_equation_0 = const()[name = tensor<string, []>("op_7839_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7839_cast_fp16 = einsum(equation = var_7839_equation_0, values = (var_7751_cast_fp16_3, var_7815_cast_fp16))[name = tensor<string, []>("op_7839_cast_fp16")];
+            tensor<string, []> var_7841_equation_0 = const()[name = tensor<string, []>("op_7841_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7841_cast_fp16 = einsum(equation = var_7841_equation_0, values = (var_7751_cast_fp16_4, var_7816_cast_fp16))[name = tensor<string, []>("op_7841_cast_fp16")];
+            tensor<string, []> var_7843_equation_0 = const()[name = tensor<string, []>("op_7843_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7843_cast_fp16 = einsum(equation = var_7843_equation_0, values = (var_7751_cast_fp16_5, var_7817_cast_fp16))[name = tensor<string, []>("op_7843_cast_fp16")];
+            tensor<string, []> var_7845_equation_0 = const()[name = tensor<string, []>("op_7845_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7845_cast_fp16 = einsum(equation = var_7845_equation_0, values = (var_7751_cast_fp16_6, var_7818_cast_fp16))[name = tensor<string, []>("op_7845_cast_fp16")];
+            tensor<string, []> var_7847_equation_0 = const()[name = tensor<string, []>("op_7847_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7847_cast_fp16 = einsum(equation = var_7847_equation_0, values = (var_7751_cast_fp16_7, var_7819_cast_fp16))[name = tensor<string, []>("op_7847_cast_fp16")];
+            tensor<string, []> var_7849_equation_0 = const()[name = tensor<string, []>("op_7849_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7849_cast_fp16 = einsum(equation = var_7849_equation_0, values = (var_7751_cast_fp16_8, var_7820_cast_fp16))[name = tensor<string, []>("op_7849_cast_fp16")];
+            tensor<string, []> var_7851_equation_0 = const()[name = tensor<string, []>("op_7851_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7851_cast_fp16 = einsum(equation = var_7851_equation_0, values = (var_7751_cast_fp16_9, var_7821_cast_fp16))[name = tensor<string, []>("op_7851_cast_fp16")];
+            tensor<string, []> var_7853_equation_0 = const()[name = tensor<string, []>("op_7853_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7853_cast_fp16 = einsum(equation = var_7853_equation_0, values = (var_7751_cast_fp16_10, var_7822_cast_fp16))[name = tensor<string, []>("op_7853_cast_fp16")];
+            tensor<string, []> var_7855_equation_0 = const()[name = tensor<string, []>("op_7855_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7855_cast_fp16 = einsum(equation = var_7855_equation_0, values = (var_7751_cast_fp16_11, var_7823_cast_fp16))[name = tensor<string, []>("op_7855_cast_fp16")];
+            tensor<string, []> var_7857_equation_0 = const()[name = tensor<string, []>("op_7857_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7857_cast_fp16 = einsum(equation = var_7857_equation_0, values = (var_7751_cast_fp16_12, var_7824_cast_fp16))[name = tensor<string, []>("op_7857_cast_fp16")];
+            tensor<string, []> var_7859_equation_0 = const()[name = tensor<string, []>("op_7859_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7859_cast_fp16 = einsum(equation = var_7859_equation_0, values = (var_7751_cast_fp16_13, var_7825_cast_fp16))[name = tensor<string, []>("op_7859_cast_fp16")];
+            tensor<string, []> var_7861_equation_0 = const()[name = tensor<string, []>("op_7861_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7861_cast_fp16 = einsum(equation = var_7861_equation_0, values = (var_7751_cast_fp16_14, var_7826_cast_fp16))[name = tensor<string, []>("op_7861_cast_fp16")];
+            tensor<string, []> var_7863_equation_0 = const()[name = tensor<string, []>("op_7863_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7863_cast_fp16 = einsum(equation = var_7863_equation_0, values = (var_7751_cast_fp16_15, var_7827_cast_fp16))[name = tensor<string, []>("op_7863_cast_fp16")];
+            tensor<string, []> var_7865_equation_0 = const()[name = tensor<string, []>("op_7865_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7865_cast_fp16 = einsum(equation = var_7865_equation_0, values = (var_7751_cast_fp16_16, var_7828_cast_fp16))[name = tensor<string, []>("op_7865_cast_fp16")];
+            tensor<string, []> var_7867_equation_0 = const()[name = tensor<string, []>("op_7867_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7867_cast_fp16 = einsum(equation = var_7867_equation_0, values = (var_7751_cast_fp16_17, var_7829_cast_fp16))[name = tensor<string, []>("op_7867_cast_fp16")];
+            tensor<string, []> var_7869_equation_0 = const()[name = tensor<string, []>("op_7869_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7869_cast_fp16 = einsum(equation = var_7869_equation_0, values = (var_7751_cast_fp16_18, var_7830_cast_fp16))[name = tensor<string, []>("op_7869_cast_fp16")];
+            tensor<string, []> var_7871_equation_0 = const()[name = tensor<string, []>("op_7871_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_7871_cast_fp16 = einsum(equation = var_7871_equation_0, values = (var_7751_cast_fp16_19, var_7831_cast_fp16))[name = tensor<string, []>("op_7871_cast_fp16")];
+            tensor<bool, []> input_285_interleave_0 = const()[name = tensor<string, []>("input_285_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_285_cast_fp16 = concat(axis = var_7656, interleave = input_285_interleave_0, values = (var_7833_cast_fp16, var_7835_cast_fp16, var_7837_cast_fp16, var_7839_cast_fp16, var_7841_cast_fp16, var_7843_cast_fp16, var_7845_cast_fp16, var_7847_cast_fp16, var_7849_cast_fp16, var_7851_cast_fp16, var_7853_cast_fp16, var_7855_cast_fp16, var_7857_cast_fp16, var_7859_cast_fp16, var_7861_cast_fp16, var_7863_cast_fp16, var_7865_cast_fp16, var_7867_cast_fp16, var_7869_cast_fp16, var_7871_cast_fp16))[name = tensor<string, []>("input_285_cast_fp16")];
+            tensor<string, []> var_7880_pad_type_0 = const()[name = tensor<string, []>("op_7880_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7880_strides_0 = const()[name = tensor<string, []>("op_7880_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7880_pad_0 = const()[name = tensor<string, []>("op_7880_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7880_dilations_0 = const()[name = tensor<string, []>("op_7880_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7880_groups_0 = const()[name = tensor<string, []>("op_7880_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_28_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_28_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1126391872)))];
+            tensor<fp16, [1280]> blocks_28_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_28_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1129668736)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7880_cast_fp16 = conv(bias = blocks_28_attn_out_bias_to_fp16, dilations = var_7880_dilations_0, groups = var_7880_groups_0, pad = var_7880_pad_0, pad_type = var_7880_pad_type_0, strides = var_7880_strides_0, weight = blocks_28_attn_out_weight_to_fp16, x = input_285_cast_fp16)[name = tensor<string, []>("op_7880_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_115_cast_fp16 = add(x = inputs_113_cast_fp16, y = var_7880_cast_fp16)[name = tensor<string, []>("inputs_115_cast_fp16")];
+            tensor<int32, [1]> input_287_axes_0 = const()[name = tensor<string, []>("input_287_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_287_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_287_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1129671360)))];
+            tensor<fp16, [1280]> input_287_beta_0_to_fp16 = const()[name = tensor<string, []>("input_287_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1129673984)))];
+            tensor<fp16, []> var_7890_to_fp16 = const()[name = tensor<string, []>("op_7890_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_287_cast_fp16 = layer_norm(axes = input_287_axes_0, beta = input_287_beta_0_to_fp16, epsilon = var_7890_to_fp16, gamma = input_287_gamma_0_to_fp16, x = inputs_115_cast_fp16)[name = tensor<string, []>("input_287_cast_fp16")];
+            tensor<string, []> input_289_pad_type_0 = const()[name = tensor<string, []>("input_289_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_289_strides_0 = const()[name = tensor<string, []>("input_289_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_289_pad_0 = const()[name = tensor<string, []>("input_289_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_289_dilations_0 = const()[name = tensor<string, []>("input_289_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_289_groups_0 = const()[name = tensor<string, []>("input_289_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_28_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_28_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1129676608)))];
+            tensor<fp16, [5120]> blocks_28_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_28_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1142783872)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_289_cast_fp16 = conv(bias = blocks_28_mlp_0_bias_to_fp16, dilations = input_289_dilations_0, groups = input_289_groups_0, pad = input_289_pad_0, pad_type = input_289_pad_type_0, strides = input_289_strides_0, weight = blocks_28_mlp_0_weight_to_fp16, x = input_287_cast_fp16)[name = tensor<string, []>("input_289_cast_fp16")];
+            tensor<string, []> input_291_mode_0 = const()[name = tensor<string, []>("input_291_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_291_cast_fp16 = gelu(mode = input_291_mode_0, x = input_289_cast_fp16)[name = tensor<string, []>("input_291_cast_fp16")];
+            tensor<string, []> var_7916_pad_type_0 = const()[name = tensor<string, []>("op_7916_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7916_strides_0 = const()[name = tensor<string, []>("op_7916_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7916_pad_0 = const()[name = tensor<string, []>("op_7916_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7916_dilations_0 = const()[name = tensor<string, []>("op_7916_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7916_groups_0 = const()[name = tensor<string, []>("op_7916_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_28_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_28_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1142794176)))];
+            tensor<fp16, [1280]> blocks_28_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_28_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1155901440)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7916_cast_fp16 = conv(bias = blocks_28_mlp_2_bias_to_fp16, dilations = var_7916_dilations_0, groups = var_7916_groups_0, pad = var_7916_pad_0, pad_type = var_7916_pad_type_0, strides = var_7916_strides_0, weight = blocks_28_mlp_2_weight_to_fp16, x = input_291_cast_fp16)[name = tensor<string, []>("op_7916_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_117_cast_fp16 = add(x = inputs_115_cast_fp16, y = var_7916_cast_fp16)[name = tensor<string, []>("inputs_117_cast_fp16")];
+            tensor<int32, []> var_7925 = const()[name = tensor<string, []>("op_7925"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_293_axes_0 = const()[name = tensor<string, []>("input_293_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_293_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_293_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1155904064)))];
+            tensor<fp16, [1280]> input_293_beta_0_to_fp16 = const()[name = tensor<string, []>("input_293_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1155906688)))];
+            tensor<fp16, []> var_7941_to_fp16 = const()[name = tensor<string, []>("op_7941_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_293_cast_fp16 = layer_norm(axes = input_293_axes_0, beta = input_293_beta_0_to_fp16, epsilon = var_7941_to_fp16, gamma = input_293_gamma_0_to_fp16, x = inputs_117_cast_fp16)[name = tensor<string, []>("input_293_cast_fp16")];
+            tensor<string, []> q_59_pad_type_0 = const()[name = tensor<string, []>("q_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_59_strides_0 = const()[name = tensor<string, []>("q_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_59_pad_0 = const()[name = tensor<string, []>("q_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_59_dilations_0 = const()[name = tensor<string, []>("q_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_59_groups_0 = const()[name = tensor<string, []>("q_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_7976_weight_0_to_fp16 = const()[name = tensor<string, []>("op_7976_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1155909312)))];
+            tensor<fp16, [1280]> var_7976_bias_0_to_fp16 = const()[name = tensor<string, []>("op_7976_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1159186176)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7976_cast_fp16 = conv(bias = var_7976_bias_0_to_fp16, dilations = q_59_dilations_0, groups = q_59_groups_0, pad = q_59_pad_0, pad_type = q_59_pad_type_0, strides = q_59_strides_0, weight = var_7976_weight_0_to_fp16, x = input_293_cast_fp16)[name = tensor<string, []>("op_7976_cast_fp16")];
+            tensor<string, []> k_59_pad_type_0 = const()[name = tensor<string, []>("k_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_59_strides_0 = const()[name = tensor<string, []>("k_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_59_pad_0 = const()[name = tensor<string, []>("k_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_59_dilations_0 = const()[name = tensor<string, []>("k_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_59_groups_0 = const()[name = tensor<string, []>("k_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_29_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_29_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1159188800)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_59_cast_fp16 = conv(dilations = k_59_dilations_0, groups = k_59_groups_0, pad = k_59_pad_0, pad_type = k_59_pad_type_0, strides = k_59_strides_0, weight = blocks_29_attn_key_weight_to_fp16, x = input_293_cast_fp16)[name = tensor<string, []>("k_59_cast_fp16")];
+            tensor<string, []> var_7974_pad_type_0 = const()[name = tensor<string, []>("op_7974_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_7974_strides_0 = const()[name = tensor<string, []>("op_7974_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_7974_pad_0 = const()[name = tensor<string, []>("op_7974_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_7974_dilations_0 = const()[name = tensor<string, []>("op_7974_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_7974_groups_0 = const()[name = tensor<string, []>("op_7974_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_29_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_29_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1162465664)))];
+            tensor<fp16, [1280]> blocks_29_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_29_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1165742528)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_7974_cast_fp16 = conv(bias = blocks_29_attn_value_bias_to_fp16, dilations = var_7974_dilations_0, groups = var_7974_groups_0, pad = var_7974_pad_0, pad_type = var_7974_pad_type_0, strides = var_7974_strides_0, weight = blocks_29_attn_value_weight_to_fp16, x = input_293_cast_fp16)[name = tensor<string, []>("op_7974_cast_fp16")];
+            tensor<int32, [20]> tile_87 = const()[name = tensor<string, []>("tile_87"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7977_axis_0 = const()[name = tensor<string, []>("op_7977_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_7977_cast_fp16_19 = split(axis = var_7977_axis_0, split_sizes = tile_87, x = var_7976_cast_fp16)[name = tensor<string, []>("op_7977_cast_fp16")];
+            tensor<int32, [4]> var_7998_perm_0 = const()[name = tensor<string, []>("op_7998_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_88 = const()[name = tensor<string, []>("tile_88"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_7999_axis_0 = const()[name = tensor<string, []>("op_7999_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_7998_cast_fp16 = transpose(perm = var_7998_perm_0, x = k_59_cast_fp16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_7999_cast_fp16_19 = split(axis = var_7999_axis_0, split_sizes = tile_88, x = var_7998_cast_fp16)[name = tensor<string, []>("op_7999_cast_fp16")];
+            tensor<int32, [20]> tile_89 = const()[name = tensor<string, []>("tile_89"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8020_axis_0 = const()[name = tensor<string, []>("op_8020_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_8020_cast_fp16_19 = split(axis = var_8020_axis_0, split_sizes = tile_89, x = var_7974_cast_fp16)[name = tensor<string, []>("op_8020_cast_fp16")];
+            tensor<string, []> aw_1161_equation_0 = const()[name = tensor<string, []>("aw_1161_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1161_cast_fp16 = einsum(equation = aw_1161_equation_0, values = (var_7999_cast_fp16_0, var_7977_cast_fp16_0))[name = tensor<string, []>("aw_1161_cast_fp16")];
+            tensor<string, []> aw_1163_equation_0 = const()[name = tensor<string, []>("aw_1163_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1163_cast_fp16 = einsum(equation = aw_1163_equation_0, values = (var_7999_cast_fp16_1, var_7977_cast_fp16_1))[name = tensor<string, []>("aw_1163_cast_fp16")];
+            tensor<string, []> aw_1165_equation_0 = const()[name = tensor<string, []>("aw_1165_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1165_cast_fp16 = einsum(equation = aw_1165_equation_0, values = (var_7999_cast_fp16_2, var_7977_cast_fp16_2))[name = tensor<string, []>("aw_1165_cast_fp16")];
+            tensor<string, []> aw_1167_equation_0 = const()[name = tensor<string, []>("aw_1167_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1167_cast_fp16 = einsum(equation = aw_1167_equation_0, values = (var_7999_cast_fp16_3, var_7977_cast_fp16_3))[name = tensor<string, []>("aw_1167_cast_fp16")];
+            tensor<string, []> aw_1169_equation_0 = const()[name = tensor<string, []>("aw_1169_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1169_cast_fp16 = einsum(equation = aw_1169_equation_0, values = (var_7999_cast_fp16_4, var_7977_cast_fp16_4))[name = tensor<string, []>("aw_1169_cast_fp16")];
+            tensor<string, []> aw_1171_equation_0 = const()[name = tensor<string, []>("aw_1171_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1171_cast_fp16 = einsum(equation = aw_1171_equation_0, values = (var_7999_cast_fp16_5, var_7977_cast_fp16_5))[name = tensor<string, []>("aw_1171_cast_fp16")];
+            tensor<string, []> aw_1173_equation_0 = const()[name = tensor<string, []>("aw_1173_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1173_cast_fp16 = einsum(equation = aw_1173_equation_0, values = (var_7999_cast_fp16_6, var_7977_cast_fp16_6))[name = tensor<string, []>("aw_1173_cast_fp16")];
+            tensor<string, []> aw_1175_equation_0 = const()[name = tensor<string, []>("aw_1175_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1175_cast_fp16 = einsum(equation = aw_1175_equation_0, values = (var_7999_cast_fp16_7, var_7977_cast_fp16_7))[name = tensor<string, []>("aw_1175_cast_fp16")];
+            tensor<string, []> aw_1177_equation_0 = const()[name = tensor<string, []>("aw_1177_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1177_cast_fp16 = einsum(equation = aw_1177_equation_0, values = (var_7999_cast_fp16_8, var_7977_cast_fp16_8))[name = tensor<string, []>("aw_1177_cast_fp16")];
+            tensor<string, []> aw_1179_equation_0 = const()[name = tensor<string, []>("aw_1179_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1179_cast_fp16 = einsum(equation = aw_1179_equation_0, values = (var_7999_cast_fp16_9, var_7977_cast_fp16_9))[name = tensor<string, []>("aw_1179_cast_fp16")];
+            tensor<string, []> aw_1181_equation_0 = const()[name = tensor<string, []>("aw_1181_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1181_cast_fp16 = einsum(equation = aw_1181_equation_0, values = (var_7999_cast_fp16_10, var_7977_cast_fp16_10))[name = tensor<string, []>("aw_1181_cast_fp16")];
+            tensor<string, []> aw_1183_equation_0 = const()[name = tensor<string, []>("aw_1183_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1183_cast_fp16 = einsum(equation = aw_1183_equation_0, values = (var_7999_cast_fp16_11, var_7977_cast_fp16_11))[name = tensor<string, []>("aw_1183_cast_fp16")];
+            tensor<string, []> aw_1185_equation_0 = const()[name = tensor<string, []>("aw_1185_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1185_cast_fp16 = einsum(equation = aw_1185_equation_0, values = (var_7999_cast_fp16_12, var_7977_cast_fp16_12))[name = tensor<string, []>("aw_1185_cast_fp16")];
+            tensor<string, []> aw_1187_equation_0 = const()[name = tensor<string, []>("aw_1187_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1187_cast_fp16 = einsum(equation = aw_1187_equation_0, values = (var_7999_cast_fp16_13, var_7977_cast_fp16_13))[name = tensor<string, []>("aw_1187_cast_fp16")];
+            tensor<string, []> aw_1189_equation_0 = const()[name = tensor<string, []>("aw_1189_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1189_cast_fp16 = einsum(equation = aw_1189_equation_0, values = (var_7999_cast_fp16_14, var_7977_cast_fp16_14))[name = tensor<string, []>("aw_1189_cast_fp16")];
+            tensor<string, []> aw_1191_equation_0 = const()[name = tensor<string, []>("aw_1191_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1191_cast_fp16 = einsum(equation = aw_1191_equation_0, values = (var_7999_cast_fp16_15, var_7977_cast_fp16_15))[name = tensor<string, []>("aw_1191_cast_fp16")];
+            tensor<string, []> aw_1193_equation_0 = const()[name = tensor<string, []>("aw_1193_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1193_cast_fp16 = einsum(equation = aw_1193_equation_0, values = (var_7999_cast_fp16_16, var_7977_cast_fp16_16))[name = tensor<string, []>("aw_1193_cast_fp16")];
+            tensor<string, []> aw_1195_equation_0 = const()[name = tensor<string, []>("aw_1195_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1195_cast_fp16 = einsum(equation = aw_1195_equation_0, values = (var_7999_cast_fp16_17, var_7977_cast_fp16_17))[name = tensor<string, []>("aw_1195_cast_fp16")];
+            tensor<string, []> aw_1197_equation_0 = const()[name = tensor<string, []>("aw_1197_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1197_cast_fp16 = einsum(equation = aw_1197_equation_0, values = (var_7999_cast_fp16_18, var_7977_cast_fp16_18))[name = tensor<string, []>("aw_1197_cast_fp16")];
+            tensor<string, []> aw_1199_equation_0 = const()[name = tensor<string, []>("aw_1199_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1199_cast_fp16 = einsum(equation = aw_1199_equation_0, values = (var_7999_cast_fp16_19, var_7977_cast_fp16_19))[name = tensor<string, []>("aw_1199_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8081_cast_fp16 = softmax(axis = var_7925, x = aw_1161_cast_fp16)[name = tensor<string, []>("op_8081_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8082_cast_fp16 = softmax(axis = var_7925, x = aw_1163_cast_fp16)[name = tensor<string, []>("op_8082_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8083_cast_fp16 = softmax(axis = var_7925, x = aw_1165_cast_fp16)[name = tensor<string, []>("op_8083_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8084_cast_fp16 = softmax(axis = var_7925, x = aw_1167_cast_fp16)[name = tensor<string, []>("op_8084_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8085_cast_fp16 = softmax(axis = var_7925, x = aw_1169_cast_fp16)[name = tensor<string, []>("op_8085_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8086_cast_fp16 = softmax(axis = var_7925, x = aw_1171_cast_fp16)[name = tensor<string, []>("op_8086_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8087_cast_fp16 = softmax(axis = var_7925, x = aw_1173_cast_fp16)[name = tensor<string, []>("op_8087_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8088_cast_fp16 = softmax(axis = var_7925, x = aw_1175_cast_fp16)[name = tensor<string, []>("op_8088_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8089_cast_fp16 = softmax(axis = var_7925, x = aw_1177_cast_fp16)[name = tensor<string, []>("op_8089_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8090_cast_fp16 = softmax(axis = var_7925, x = aw_1179_cast_fp16)[name = tensor<string, []>("op_8090_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8091_cast_fp16 = softmax(axis = var_7925, x = aw_1181_cast_fp16)[name = tensor<string, []>("op_8091_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8092_cast_fp16 = softmax(axis = var_7925, x = aw_1183_cast_fp16)[name = tensor<string, []>("op_8092_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8093_cast_fp16 = softmax(axis = var_7925, x = aw_1185_cast_fp16)[name = tensor<string, []>("op_8093_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8094_cast_fp16 = softmax(axis = var_7925, x = aw_1187_cast_fp16)[name = tensor<string, []>("op_8094_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8095_cast_fp16 = softmax(axis = var_7925, x = aw_1189_cast_fp16)[name = tensor<string, []>("op_8095_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8096_cast_fp16 = softmax(axis = var_7925, x = aw_1191_cast_fp16)[name = tensor<string, []>("op_8096_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8097_cast_fp16 = softmax(axis = var_7925, x = aw_1193_cast_fp16)[name = tensor<string, []>("op_8097_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8098_cast_fp16 = softmax(axis = var_7925, x = aw_1195_cast_fp16)[name = tensor<string, []>("op_8098_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8099_cast_fp16 = softmax(axis = var_7925, x = aw_1197_cast_fp16)[name = tensor<string, []>("op_8099_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8100_cast_fp16 = softmax(axis = var_7925, x = aw_1199_cast_fp16)[name = tensor<string, []>("op_8100_cast_fp16")];
+            tensor<string, []> var_8102_equation_0 = const()[name = tensor<string, []>("op_8102_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8102_cast_fp16 = einsum(equation = var_8102_equation_0, values = (var_8020_cast_fp16_0, var_8081_cast_fp16))[name = tensor<string, []>("op_8102_cast_fp16")];
+            tensor<string, []> var_8104_equation_0 = const()[name = tensor<string, []>("op_8104_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8104_cast_fp16 = einsum(equation = var_8104_equation_0, values = (var_8020_cast_fp16_1, var_8082_cast_fp16))[name = tensor<string, []>("op_8104_cast_fp16")];
+            tensor<string, []> var_8106_equation_0 = const()[name = tensor<string, []>("op_8106_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8106_cast_fp16 = einsum(equation = var_8106_equation_0, values = (var_8020_cast_fp16_2, var_8083_cast_fp16))[name = tensor<string, []>("op_8106_cast_fp16")];
+            tensor<string, []> var_8108_equation_0 = const()[name = tensor<string, []>("op_8108_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8108_cast_fp16 = einsum(equation = var_8108_equation_0, values = (var_8020_cast_fp16_3, var_8084_cast_fp16))[name = tensor<string, []>("op_8108_cast_fp16")];
+            tensor<string, []> var_8110_equation_0 = const()[name = tensor<string, []>("op_8110_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8110_cast_fp16 = einsum(equation = var_8110_equation_0, values = (var_8020_cast_fp16_4, var_8085_cast_fp16))[name = tensor<string, []>("op_8110_cast_fp16")];
+            tensor<string, []> var_8112_equation_0 = const()[name = tensor<string, []>("op_8112_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8112_cast_fp16 = einsum(equation = var_8112_equation_0, values = (var_8020_cast_fp16_5, var_8086_cast_fp16))[name = tensor<string, []>("op_8112_cast_fp16")];
+            tensor<string, []> var_8114_equation_0 = const()[name = tensor<string, []>("op_8114_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8114_cast_fp16 = einsum(equation = var_8114_equation_0, values = (var_8020_cast_fp16_6, var_8087_cast_fp16))[name = tensor<string, []>("op_8114_cast_fp16")];
+            tensor<string, []> var_8116_equation_0 = const()[name = tensor<string, []>("op_8116_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8116_cast_fp16 = einsum(equation = var_8116_equation_0, values = (var_8020_cast_fp16_7, var_8088_cast_fp16))[name = tensor<string, []>("op_8116_cast_fp16")];
+            tensor<string, []> var_8118_equation_0 = const()[name = tensor<string, []>("op_8118_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8118_cast_fp16 = einsum(equation = var_8118_equation_0, values = (var_8020_cast_fp16_8, var_8089_cast_fp16))[name = tensor<string, []>("op_8118_cast_fp16")];
+            tensor<string, []> var_8120_equation_0 = const()[name = tensor<string, []>("op_8120_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8120_cast_fp16 = einsum(equation = var_8120_equation_0, values = (var_8020_cast_fp16_9, var_8090_cast_fp16))[name = tensor<string, []>("op_8120_cast_fp16")];
+            tensor<string, []> var_8122_equation_0 = const()[name = tensor<string, []>("op_8122_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8122_cast_fp16 = einsum(equation = var_8122_equation_0, values = (var_8020_cast_fp16_10, var_8091_cast_fp16))[name = tensor<string, []>("op_8122_cast_fp16")];
+            tensor<string, []> var_8124_equation_0 = const()[name = tensor<string, []>("op_8124_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8124_cast_fp16 = einsum(equation = var_8124_equation_0, values = (var_8020_cast_fp16_11, var_8092_cast_fp16))[name = tensor<string, []>("op_8124_cast_fp16")];
+            tensor<string, []> var_8126_equation_0 = const()[name = tensor<string, []>("op_8126_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8126_cast_fp16 = einsum(equation = var_8126_equation_0, values = (var_8020_cast_fp16_12, var_8093_cast_fp16))[name = tensor<string, []>("op_8126_cast_fp16")];
+            tensor<string, []> var_8128_equation_0 = const()[name = tensor<string, []>("op_8128_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8128_cast_fp16 = einsum(equation = var_8128_equation_0, values = (var_8020_cast_fp16_13, var_8094_cast_fp16))[name = tensor<string, []>("op_8128_cast_fp16")];
+            tensor<string, []> var_8130_equation_0 = const()[name = tensor<string, []>("op_8130_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8130_cast_fp16 = einsum(equation = var_8130_equation_0, values = (var_8020_cast_fp16_14, var_8095_cast_fp16))[name = tensor<string, []>("op_8130_cast_fp16")];
+            tensor<string, []> var_8132_equation_0 = const()[name = tensor<string, []>("op_8132_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8132_cast_fp16 = einsum(equation = var_8132_equation_0, values = (var_8020_cast_fp16_15, var_8096_cast_fp16))[name = tensor<string, []>("op_8132_cast_fp16")];
+            tensor<string, []> var_8134_equation_0 = const()[name = tensor<string, []>("op_8134_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8134_cast_fp16 = einsum(equation = var_8134_equation_0, values = (var_8020_cast_fp16_16, var_8097_cast_fp16))[name = tensor<string, []>("op_8134_cast_fp16")];
+            tensor<string, []> var_8136_equation_0 = const()[name = tensor<string, []>("op_8136_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8136_cast_fp16 = einsum(equation = var_8136_equation_0, values = (var_8020_cast_fp16_17, var_8098_cast_fp16))[name = tensor<string, []>("op_8136_cast_fp16")];
+            tensor<string, []> var_8138_equation_0 = const()[name = tensor<string, []>("op_8138_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8138_cast_fp16 = einsum(equation = var_8138_equation_0, values = (var_8020_cast_fp16_18, var_8099_cast_fp16))[name = tensor<string, []>("op_8138_cast_fp16")];
+            tensor<string, []> var_8140_equation_0 = const()[name = tensor<string, []>("op_8140_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8140_cast_fp16 = einsum(equation = var_8140_equation_0, values = (var_8020_cast_fp16_19, var_8100_cast_fp16))[name = tensor<string, []>("op_8140_cast_fp16")];
+            tensor<bool, []> input_295_interleave_0 = const()[name = tensor<string, []>("input_295_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_295_cast_fp16 = concat(axis = var_7925, interleave = input_295_interleave_0, values = (var_8102_cast_fp16, var_8104_cast_fp16, var_8106_cast_fp16, var_8108_cast_fp16, var_8110_cast_fp16, var_8112_cast_fp16, var_8114_cast_fp16, var_8116_cast_fp16, var_8118_cast_fp16, var_8120_cast_fp16, var_8122_cast_fp16, var_8124_cast_fp16, var_8126_cast_fp16, var_8128_cast_fp16, var_8130_cast_fp16, var_8132_cast_fp16, var_8134_cast_fp16, var_8136_cast_fp16, var_8138_cast_fp16, var_8140_cast_fp16))[name = tensor<string, []>("input_295_cast_fp16")];
+            tensor<string, []> var_8149_pad_type_0 = const()[name = tensor<string, []>("op_8149_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8149_strides_0 = const()[name = tensor<string, []>("op_8149_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8149_pad_0 = const()[name = tensor<string, []>("op_8149_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8149_dilations_0 = const()[name = tensor<string, []>("op_8149_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8149_groups_0 = const()[name = tensor<string, []>("op_8149_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_29_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_29_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1165745152)))];
+            tensor<fp16, [1280]> blocks_29_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_29_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1169022016)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8149_cast_fp16 = conv(bias = blocks_29_attn_out_bias_to_fp16, dilations = var_8149_dilations_0, groups = var_8149_groups_0, pad = var_8149_pad_0, pad_type = var_8149_pad_type_0, strides = var_8149_strides_0, weight = blocks_29_attn_out_weight_to_fp16, x = input_295_cast_fp16)[name = tensor<string, []>("op_8149_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_119_cast_fp16 = add(x = inputs_117_cast_fp16, y = var_8149_cast_fp16)[name = tensor<string, []>("inputs_119_cast_fp16")];
+            tensor<int32, [1]> input_297_axes_0 = const()[name = tensor<string, []>("input_297_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_297_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_297_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1169024640)))];
+            tensor<fp16, [1280]> input_297_beta_0_to_fp16 = const()[name = tensor<string, []>("input_297_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1169027264)))];
+            tensor<fp16, []> var_8159_to_fp16 = const()[name = tensor<string, []>("op_8159_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_297_cast_fp16 = layer_norm(axes = input_297_axes_0, beta = input_297_beta_0_to_fp16, epsilon = var_8159_to_fp16, gamma = input_297_gamma_0_to_fp16, x = inputs_119_cast_fp16)[name = tensor<string, []>("input_297_cast_fp16")];
+            tensor<string, []> input_299_pad_type_0 = const()[name = tensor<string, []>("input_299_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_299_strides_0 = const()[name = tensor<string, []>("input_299_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_299_pad_0 = const()[name = tensor<string, []>("input_299_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_299_dilations_0 = const()[name = tensor<string, []>("input_299_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_299_groups_0 = const()[name = tensor<string, []>("input_299_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_29_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_29_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1169029888)))];
+            tensor<fp16, [5120]> blocks_29_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_29_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1182137152)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_299_cast_fp16 = conv(bias = blocks_29_mlp_0_bias_to_fp16, dilations = input_299_dilations_0, groups = input_299_groups_0, pad = input_299_pad_0, pad_type = input_299_pad_type_0, strides = input_299_strides_0, weight = blocks_29_mlp_0_weight_to_fp16, x = input_297_cast_fp16)[name = tensor<string, []>("input_299_cast_fp16")];
+            tensor<string, []> input_301_mode_0 = const()[name = tensor<string, []>("input_301_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_301_cast_fp16 = gelu(mode = input_301_mode_0, x = input_299_cast_fp16)[name = tensor<string, []>("input_301_cast_fp16")];
+            tensor<string, []> var_8185_pad_type_0 = const()[name = tensor<string, []>("op_8185_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8185_strides_0 = const()[name = tensor<string, []>("op_8185_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8185_pad_0 = const()[name = tensor<string, []>("op_8185_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8185_dilations_0 = const()[name = tensor<string, []>("op_8185_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8185_groups_0 = const()[name = tensor<string, []>("op_8185_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_29_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_29_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1182147456)))];
+            tensor<fp16, [1280]> blocks_29_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_29_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1195254720)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8185_cast_fp16 = conv(bias = blocks_29_mlp_2_bias_to_fp16, dilations = var_8185_dilations_0, groups = var_8185_groups_0, pad = var_8185_pad_0, pad_type = var_8185_pad_type_0, strides = var_8185_strides_0, weight = blocks_29_mlp_2_weight_to_fp16, x = input_301_cast_fp16)[name = tensor<string, []>("op_8185_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_121_cast_fp16 = add(x = inputs_119_cast_fp16, y = var_8185_cast_fp16)[name = tensor<string, []>("inputs_121_cast_fp16")];
+            tensor<int32, []> var_8194 = const()[name = tensor<string, []>("op_8194"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_303_axes_0 = const()[name = tensor<string, []>("input_303_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_303_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_303_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1195257344)))];
+            tensor<fp16, [1280]> input_303_beta_0_to_fp16 = const()[name = tensor<string, []>("input_303_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1195259968)))];
+            tensor<fp16, []> var_8210_to_fp16 = const()[name = tensor<string, []>("op_8210_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_303_cast_fp16 = layer_norm(axes = input_303_axes_0, beta = input_303_beta_0_to_fp16, epsilon = var_8210_to_fp16, gamma = input_303_gamma_0_to_fp16, x = inputs_121_cast_fp16)[name = tensor<string, []>("input_303_cast_fp16")];
+            tensor<string, []> q_61_pad_type_0 = const()[name = tensor<string, []>("q_61_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_61_strides_0 = const()[name = tensor<string, []>("q_61_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_61_pad_0 = const()[name = tensor<string, []>("q_61_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_61_dilations_0 = const()[name = tensor<string, []>("q_61_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_61_groups_0 = const()[name = tensor<string, []>("q_61_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_8245_weight_0_to_fp16 = const()[name = tensor<string, []>("op_8245_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1195262592)))];
+            tensor<fp16, [1280]> var_8245_bias_0_to_fp16 = const()[name = tensor<string, []>("op_8245_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1198539456)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8245_cast_fp16 = conv(bias = var_8245_bias_0_to_fp16, dilations = q_61_dilations_0, groups = q_61_groups_0, pad = q_61_pad_0, pad_type = q_61_pad_type_0, strides = q_61_strides_0, weight = var_8245_weight_0_to_fp16, x = input_303_cast_fp16)[name = tensor<string, []>("op_8245_cast_fp16")];
+            tensor<string, []> k_61_pad_type_0 = const()[name = tensor<string, []>("k_61_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_61_strides_0 = const()[name = tensor<string, []>("k_61_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_61_pad_0 = const()[name = tensor<string, []>("k_61_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_61_dilations_0 = const()[name = tensor<string, []>("k_61_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_61_groups_0 = const()[name = tensor<string, []>("k_61_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_30_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_30_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1198542080)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_61_cast_fp16 = conv(dilations = k_61_dilations_0, groups = k_61_groups_0, pad = k_61_pad_0, pad_type = k_61_pad_type_0, strides = k_61_strides_0, weight = blocks_30_attn_key_weight_to_fp16, x = input_303_cast_fp16)[name = tensor<string, []>("k_61_cast_fp16")];
+            tensor<string, []> var_8243_pad_type_0 = const()[name = tensor<string, []>("op_8243_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8243_strides_0 = const()[name = tensor<string, []>("op_8243_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8243_pad_0 = const()[name = tensor<string, []>("op_8243_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8243_dilations_0 = const()[name = tensor<string, []>("op_8243_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8243_groups_0 = const()[name = tensor<string, []>("op_8243_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_30_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_30_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1201818944)))];
+            tensor<fp16, [1280]> blocks_30_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_30_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1205095808)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8243_cast_fp16 = conv(bias = blocks_30_attn_value_bias_to_fp16, dilations = var_8243_dilations_0, groups = var_8243_groups_0, pad = var_8243_pad_0, pad_type = var_8243_pad_type_0, strides = var_8243_strides_0, weight = blocks_30_attn_value_weight_to_fp16, x = input_303_cast_fp16)[name = tensor<string, []>("op_8243_cast_fp16")];
+            tensor<int32, [20]> tile_90 = const()[name = tensor<string, []>("tile_90"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8246_axis_0 = const()[name = tensor<string, []>("op_8246_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_8246_cast_fp16_19 = split(axis = var_8246_axis_0, split_sizes = tile_90, x = var_8245_cast_fp16)[name = tensor<string, []>("op_8246_cast_fp16")];
+            tensor<int32, [4]> var_8267_perm_0 = const()[name = tensor<string, []>("op_8267_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_91 = const()[name = tensor<string, []>("tile_91"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8268_axis_0 = const()[name = tensor<string, []>("op_8268_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_8267_cast_fp16 = transpose(perm = var_8267_perm_0, x = k_61_cast_fp16)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_8268_cast_fp16_19 = split(axis = var_8268_axis_0, split_sizes = tile_91, x = var_8267_cast_fp16)[name = tensor<string, []>("op_8268_cast_fp16")];
+            tensor<int32, [20]> tile_92 = const()[name = tensor<string, []>("tile_92"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8289_axis_0 = const()[name = tensor<string, []>("op_8289_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_8289_cast_fp16_19 = split(axis = var_8289_axis_0, split_sizes = tile_92, x = var_8243_cast_fp16)[name = tensor<string, []>("op_8289_cast_fp16")];
+            tensor<string, []> aw_1201_equation_0 = const()[name = tensor<string, []>("aw_1201_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1201_cast_fp16 = einsum(equation = aw_1201_equation_0, values = (var_8268_cast_fp16_0, var_8246_cast_fp16_0))[name = tensor<string, []>("aw_1201_cast_fp16")];
+            tensor<string, []> aw_1203_equation_0 = const()[name = tensor<string, []>("aw_1203_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1203_cast_fp16 = einsum(equation = aw_1203_equation_0, values = (var_8268_cast_fp16_1, var_8246_cast_fp16_1))[name = tensor<string, []>("aw_1203_cast_fp16")];
+            tensor<string, []> aw_1205_equation_0 = const()[name = tensor<string, []>("aw_1205_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1205_cast_fp16 = einsum(equation = aw_1205_equation_0, values = (var_8268_cast_fp16_2, var_8246_cast_fp16_2))[name = tensor<string, []>("aw_1205_cast_fp16")];
+            tensor<string, []> aw_1207_equation_0 = const()[name = tensor<string, []>("aw_1207_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1207_cast_fp16 = einsum(equation = aw_1207_equation_0, values = (var_8268_cast_fp16_3, var_8246_cast_fp16_3))[name = tensor<string, []>("aw_1207_cast_fp16")];
+            tensor<string, []> aw_1209_equation_0 = const()[name = tensor<string, []>("aw_1209_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1209_cast_fp16 = einsum(equation = aw_1209_equation_0, values = (var_8268_cast_fp16_4, var_8246_cast_fp16_4))[name = tensor<string, []>("aw_1209_cast_fp16")];
+            tensor<string, []> aw_1211_equation_0 = const()[name = tensor<string, []>("aw_1211_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1211_cast_fp16 = einsum(equation = aw_1211_equation_0, values = (var_8268_cast_fp16_5, var_8246_cast_fp16_5))[name = tensor<string, []>("aw_1211_cast_fp16")];
+            tensor<string, []> aw_1213_equation_0 = const()[name = tensor<string, []>("aw_1213_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1213_cast_fp16 = einsum(equation = aw_1213_equation_0, values = (var_8268_cast_fp16_6, var_8246_cast_fp16_6))[name = tensor<string, []>("aw_1213_cast_fp16")];
+            tensor<string, []> aw_1215_equation_0 = const()[name = tensor<string, []>("aw_1215_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1215_cast_fp16 = einsum(equation = aw_1215_equation_0, values = (var_8268_cast_fp16_7, var_8246_cast_fp16_7))[name = tensor<string, []>("aw_1215_cast_fp16")];
+            tensor<string, []> aw_1217_equation_0 = const()[name = tensor<string, []>("aw_1217_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1217_cast_fp16 = einsum(equation = aw_1217_equation_0, values = (var_8268_cast_fp16_8, var_8246_cast_fp16_8))[name = tensor<string, []>("aw_1217_cast_fp16")];
+            tensor<string, []> aw_1219_equation_0 = const()[name = tensor<string, []>("aw_1219_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1219_cast_fp16 = einsum(equation = aw_1219_equation_0, values = (var_8268_cast_fp16_9, var_8246_cast_fp16_9))[name = tensor<string, []>("aw_1219_cast_fp16")];
+            tensor<string, []> aw_1221_equation_0 = const()[name = tensor<string, []>("aw_1221_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1221_cast_fp16 = einsum(equation = aw_1221_equation_0, values = (var_8268_cast_fp16_10, var_8246_cast_fp16_10))[name = tensor<string, []>("aw_1221_cast_fp16")];
+            tensor<string, []> aw_1223_equation_0 = const()[name = tensor<string, []>("aw_1223_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1223_cast_fp16 = einsum(equation = aw_1223_equation_0, values = (var_8268_cast_fp16_11, var_8246_cast_fp16_11))[name = tensor<string, []>("aw_1223_cast_fp16")];
+            tensor<string, []> aw_1225_equation_0 = const()[name = tensor<string, []>("aw_1225_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1225_cast_fp16 = einsum(equation = aw_1225_equation_0, values = (var_8268_cast_fp16_12, var_8246_cast_fp16_12))[name = tensor<string, []>("aw_1225_cast_fp16")];
+            tensor<string, []> aw_1227_equation_0 = const()[name = tensor<string, []>("aw_1227_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1227_cast_fp16 = einsum(equation = aw_1227_equation_0, values = (var_8268_cast_fp16_13, var_8246_cast_fp16_13))[name = tensor<string, []>("aw_1227_cast_fp16")];
+            tensor<string, []> aw_1229_equation_0 = const()[name = tensor<string, []>("aw_1229_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1229_cast_fp16 = einsum(equation = aw_1229_equation_0, values = (var_8268_cast_fp16_14, var_8246_cast_fp16_14))[name = tensor<string, []>("aw_1229_cast_fp16")];
+            tensor<string, []> aw_1231_equation_0 = const()[name = tensor<string, []>("aw_1231_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1231_cast_fp16 = einsum(equation = aw_1231_equation_0, values = (var_8268_cast_fp16_15, var_8246_cast_fp16_15))[name = tensor<string, []>("aw_1231_cast_fp16")];
+            tensor<string, []> aw_1233_equation_0 = const()[name = tensor<string, []>("aw_1233_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1233_cast_fp16 = einsum(equation = aw_1233_equation_0, values = (var_8268_cast_fp16_16, var_8246_cast_fp16_16))[name = tensor<string, []>("aw_1233_cast_fp16")];
+            tensor<string, []> aw_1235_equation_0 = const()[name = tensor<string, []>("aw_1235_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1235_cast_fp16 = einsum(equation = aw_1235_equation_0, values = (var_8268_cast_fp16_17, var_8246_cast_fp16_17))[name = tensor<string, []>("aw_1235_cast_fp16")];
+            tensor<string, []> aw_1237_equation_0 = const()[name = tensor<string, []>("aw_1237_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1237_cast_fp16 = einsum(equation = aw_1237_equation_0, values = (var_8268_cast_fp16_18, var_8246_cast_fp16_18))[name = tensor<string, []>("aw_1237_cast_fp16")];
+            tensor<string, []> aw_1239_equation_0 = const()[name = tensor<string, []>("aw_1239_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1239_cast_fp16 = einsum(equation = aw_1239_equation_0, values = (var_8268_cast_fp16_19, var_8246_cast_fp16_19))[name = tensor<string, []>("aw_1239_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8350_cast_fp16 = softmax(axis = var_8194, x = aw_1201_cast_fp16)[name = tensor<string, []>("op_8350_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8351_cast_fp16 = softmax(axis = var_8194, x = aw_1203_cast_fp16)[name = tensor<string, []>("op_8351_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8352_cast_fp16 = softmax(axis = var_8194, x = aw_1205_cast_fp16)[name = tensor<string, []>("op_8352_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8353_cast_fp16 = softmax(axis = var_8194, x = aw_1207_cast_fp16)[name = tensor<string, []>("op_8353_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8354_cast_fp16 = softmax(axis = var_8194, x = aw_1209_cast_fp16)[name = tensor<string, []>("op_8354_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8355_cast_fp16 = softmax(axis = var_8194, x = aw_1211_cast_fp16)[name = tensor<string, []>("op_8355_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8356_cast_fp16 = softmax(axis = var_8194, x = aw_1213_cast_fp16)[name = tensor<string, []>("op_8356_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8357_cast_fp16 = softmax(axis = var_8194, x = aw_1215_cast_fp16)[name = tensor<string, []>("op_8357_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8358_cast_fp16 = softmax(axis = var_8194, x = aw_1217_cast_fp16)[name = tensor<string, []>("op_8358_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8359_cast_fp16 = softmax(axis = var_8194, x = aw_1219_cast_fp16)[name = tensor<string, []>("op_8359_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8360_cast_fp16 = softmax(axis = var_8194, x = aw_1221_cast_fp16)[name = tensor<string, []>("op_8360_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8361_cast_fp16 = softmax(axis = var_8194, x = aw_1223_cast_fp16)[name = tensor<string, []>("op_8361_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8362_cast_fp16 = softmax(axis = var_8194, x = aw_1225_cast_fp16)[name = tensor<string, []>("op_8362_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8363_cast_fp16 = softmax(axis = var_8194, x = aw_1227_cast_fp16)[name = tensor<string, []>("op_8363_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8364_cast_fp16 = softmax(axis = var_8194, x = aw_1229_cast_fp16)[name = tensor<string, []>("op_8364_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8365_cast_fp16 = softmax(axis = var_8194, x = aw_1231_cast_fp16)[name = tensor<string, []>("op_8365_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8366_cast_fp16 = softmax(axis = var_8194, x = aw_1233_cast_fp16)[name = tensor<string, []>("op_8366_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8367_cast_fp16 = softmax(axis = var_8194, x = aw_1235_cast_fp16)[name = tensor<string, []>("op_8367_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8368_cast_fp16 = softmax(axis = var_8194, x = aw_1237_cast_fp16)[name = tensor<string, []>("op_8368_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8369_cast_fp16 = softmax(axis = var_8194, x = aw_1239_cast_fp16)[name = tensor<string, []>("op_8369_cast_fp16")];
+            tensor<string, []> var_8371_equation_0 = const()[name = tensor<string, []>("op_8371_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8371_cast_fp16 = einsum(equation = var_8371_equation_0, values = (var_8289_cast_fp16_0, var_8350_cast_fp16))[name = tensor<string, []>("op_8371_cast_fp16")];
+            tensor<string, []> var_8373_equation_0 = const()[name = tensor<string, []>("op_8373_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8373_cast_fp16 = einsum(equation = var_8373_equation_0, values = (var_8289_cast_fp16_1, var_8351_cast_fp16))[name = tensor<string, []>("op_8373_cast_fp16")];
+            tensor<string, []> var_8375_equation_0 = const()[name = tensor<string, []>("op_8375_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8375_cast_fp16 = einsum(equation = var_8375_equation_0, values = (var_8289_cast_fp16_2, var_8352_cast_fp16))[name = tensor<string, []>("op_8375_cast_fp16")];
+            tensor<string, []> var_8377_equation_0 = const()[name = tensor<string, []>("op_8377_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8377_cast_fp16 = einsum(equation = var_8377_equation_0, values = (var_8289_cast_fp16_3, var_8353_cast_fp16))[name = tensor<string, []>("op_8377_cast_fp16")];
+            tensor<string, []> var_8379_equation_0 = const()[name = tensor<string, []>("op_8379_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8379_cast_fp16 = einsum(equation = var_8379_equation_0, values = (var_8289_cast_fp16_4, var_8354_cast_fp16))[name = tensor<string, []>("op_8379_cast_fp16")];
+            tensor<string, []> var_8381_equation_0 = const()[name = tensor<string, []>("op_8381_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8381_cast_fp16 = einsum(equation = var_8381_equation_0, values = (var_8289_cast_fp16_5, var_8355_cast_fp16))[name = tensor<string, []>("op_8381_cast_fp16")];
+            tensor<string, []> var_8383_equation_0 = const()[name = tensor<string, []>("op_8383_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8383_cast_fp16 = einsum(equation = var_8383_equation_0, values = (var_8289_cast_fp16_6, var_8356_cast_fp16))[name = tensor<string, []>("op_8383_cast_fp16")];
+            tensor<string, []> var_8385_equation_0 = const()[name = tensor<string, []>("op_8385_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8385_cast_fp16 = einsum(equation = var_8385_equation_0, values = (var_8289_cast_fp16_7, var_8357_cast_fp16))[name = tensor<string, []>("op_8385_cast_fp16")];
+            tensor<string, []> var_8387_equation_0 = const()[name = tensor<string, []>("op_8387_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8387_cast_fp16 = einsum(equation = var_8387_equation_0, values = (var_8289_cast_fp16_8, var_8358_cast_fp16))[name = tensor<string, []>("op_8387_cast_fp16")];
+            tensor<string, []> var_8389_equation_0 = const()[name = tensor<string, []>("op_8389_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8389_cast_fp16 = einsum(equation = var_8389_equation_0, values = (var_8289_cast_fp16_9, var_8359_cast_fp16))[name = tensor<string, []>("op_8389_cast_fp16")];
+            tensor<string, []> var_8391_equation_0 = const()[name = tensor<string, []>("op_8391_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8391_cast_fp16 = einsum(equation = var_8391_equation_0, values = (var_8289_cast_fp16_10, var_8360_cast_fp16))[name = tensor<string, []>("op_8391_cast_fp16")];
+            tensor<string, []> var_8393_equation_0 = const()[name = tensor<string, []>("op_8393_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8393_cast_fp16 = einsum(equation = var_8393_equation_0, values = (var_8289_cast_fp16_11, var_8361_cast_fp16))[name = tensor<string, []>("op_8393_cast_fp16")];
+            tensor<string, []> var_8395_equation_0 = const()[name = tensor<string, []>("op_8395_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8395_cast_fp16 = einsum(equation = var_8395_equation_0, values = (var_8289_cast_fp16_12, var_8362_cast_fp16))[name = tensor<string, []>("op_8395_cast_fp16")];
+            tensor<string, []> var_8397_equation_0 = const()[name = tensor<string, []>("op_8397_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8397_cast_fp16 = einsum(equation = var_8397_equation_0, values = (var_8289_cast_fp16_13, var_8363_cast_fp16))[name = tensor<string, []>("op_8397_cast_fp16")];
+            tensor<string, []> var_8399_equation_0 = const()[name = tensor<string, []>("op_8399_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8399_cast_fp16 = einsum(equation = var_8399_equation_0, values = (var_8289_cast_fp16_14, var_8364_cast_fp16))[name = tensor<string, []>("op_8399_cast_fp16")];
+            tensor<string, []> var_8401_equation_0 = const()[name = tensor<string, []>("op_8401_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8401_cast_fp16 = einsum(equation = var_8401_equation_0, values = (var_8289_cast_fp16_15, var_8365_cast_fp16))[name = tensor<string, []>("op_8401_cast_fp16")];
+            tensor<string, []> var_8403_equation_0 = const()[name = tensor<string, []>("op_8403_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8403_cast_fp16 = einsum(equation = var_8403_equation_0, values = (var_8289_cast_fp16_16, var_8366_cast_fp16))[name = tensor<string, []>("op_8403_cast_fp16")];
+            tensor<string, []> var_8405_equation_0 = const()[name = tensor<string, []>("op_8405_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8405_cast_fp16 = einsum(equation = var_8405_equation_0, values = (var_8289_cast_fp16_17, var_8367_cast_fp16))[name = tensor<string, []>("op_8405_cast_fp16")];
+            tensor<string, []> var_8407_equation_0 = const()[name = tensor<string, []>("op_8407_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8407_cast_fp16 = einsum(equation = var_8407_equation_0, values = (var_8289_cast_fp16_18, var_8368_cast_fp16))[name = tensor<string, []>("op_8407_cast_fp16")];
+            tensor<string, []> var_8409_equation_0 = const()[name = tensor<string, []>("op_8409_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8409_cast_fp16 = einsum(equation = var_8409_equation_0, values = (var_8289_cast_fp16_19, var_8369_cast_fp16))[name = tensor<string, []>("op_8409_cast_fp16")];
+            tensor<bool, []> input_305_interleave_0 = const()[name = tensor<string, []>("input_305_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_305_cast_fp16 = concat(axis = var_8194, interleave = input_305_interleave_0, values = (var_8371_cast_fp16, var_8373_cast_fp16, var_8375_cast_fp16, var_8377_cast_fp16, var_8379_cast_fp16, var_8381_cast_fp16, var_8383_cast_fp16, var_8385_cast_fp16, var_8387_cast_fp16, var_8389_cast_fp16, var_8391_cast_fp16, var_8393_cast_fp16, var_8395_cast_fp16, var_8397_cast_fp16, var_8399_cast_fp16, var_8401_cast_fp16, var_8403_cast_fp16, var_8405_cast_fp16, var_8407_cast_fp16, var_8409_cast_fp16))[name = tensor<string, []>("input_305_cast_fp16")];
+            tensor<string, []> var_8418_pad_type_0 = const()[name = tensor<string, []>("op_8418_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8418_strides_0 = const()[name = tensor<string, []>("op_8418_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8418_pad_0 = const()[name = tensor<string, []>("op_8418_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8418_dilations_0 = const()[name = tensor<string, []>("op_8418_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8418_groups_0 = const()[name = tensor<string, []>("op_8418_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_30_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_30_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1205098432)))];
+            tensor<fp16, [1280]> blocks_30_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_30_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1208375296)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8418_cast_fp16 = conv(bias = blocks_30_attn_out_bias_to_fp16, dilations = var_8418_dilations_0, groups = var_8418_groups_0, pad = var_8418_pad_0, pad_type = var_8418_pad_type_0, strides = var_8418_strides_0, weight = blocks_30_attn_out_weight_to_fp16, x = input_305_cast_fp16)[name = tensor<string, []>("op_8418_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_123_cast_fp16 = add(x = inputs_121_cast_fp16, y = var_8418_cast_fp16)[name = tensor<string, []>("inputs_123_cast_fp16")];
+            tensor<int32, [1]> input_307_axes_0 = const()[name = tensor<string, []>("input_307_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_307_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_307_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1208377920)))];
+            tensor<fp16, [1280]> input_307_beta_0_to_fp16 = const()[name = tensor<string, []>("input_307_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1208380544)))];
+            tensor<fp16, []> var_8428_to_fp16 = const()[name = tensor<string, []>("op_8428_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_307_cast_fp16 = layer_norm(axes = input_307_axes_0, beta = input_307_beta_0_to_fp16, epsilon = var_8428_to_fp16, gamma = input_307_gamma_0_to_fp16, x = inputs_123_cast_fp16)[name = tensor<string, []>("input_307_cast_fp16")];
+            tensor<string, []> input_309_pad_type_0 = const()[name = tensor<string, []>("input_309_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_309_strides_0 = const()[name = tensor<string, []>("input_309_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_309_pad_0 = const()[name = tensor<string, []>("input_309_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_309_dilations_0 = const()[name = tensor<string, []>("input_309_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_309_groups_0 = const()[name = tensor<string, []>("input_309_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_30_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_30_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1208383168)))];
+            tensor<fp16, [5120]> blocks_30_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_30_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1221490432)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_309_cast_fp16 = conv(bias = blocks_30_mlp_0_bias_to_fp16, dilations = input_309_dilations_0, groups = input_309_groups_0, pad = input_309_pad_0, pad_type = input_309_pad_type_0, strides = input_309_strides_0, weight = blocks_30_mlp_0_weight_to_fp16, x = input_307_cast_fp16)[name = tensor<string, []>("input_309_cast_fp16")];
+            tensor<string, []> input_311_mode_0 = const()[name = tensor<string, []>("input_311_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_311_cast_fp16 = gelu(mode = input_311_mode_0, x = input_309_cast_fp16)[name = tensor<string, []>("input_311_cast_fp16")];
+            tensor<string, []> var_8454_pad_type_0 = const()[name = tensor<string, []>("op_8454_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8454_strides_0 = const()[name = tensor<string, []>("op_8454_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8454_pad_0 = const()[name = tensor<string, []>("op_8454_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8454_dilations_0 = const()[name = tensor<string, []>("op_8454_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8454_groups_0 = const()[name = tensor<string, []>("op_8454_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_30_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_30_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1221500736)))];
+            tensor<fp16, [1280]> blocks_30_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_30_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1234608000)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8454_cast_fp16 = conv(bias = blocks_30_mlp_2_bias_to_fp16, dilations = var_8454_dilations_0, groups = var_8454_groups_0, pad = var_8454_pad_0, pad_type = var_8454_pad_type_0, strides = var_8454_strides_0, weight = blocks_30_mlp_2_weight_to_fp16, x = input_311_cast_fp16)[name = tensor<string, []>("op_8454_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_125_cast_fp16 = add(x = inputs_123_cast_fp16, y = var_8454_cast_fp16)[name = tensor<string, []>("inputs_125_cast_fp16")];
+            tensor<int32, []> var_8463 = const()[name = tensor<string, []>("op_8463"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_313_axes_0 = const()[name = tensor<string, []>("input_313_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_313_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_313_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1234610624)))];
+            tensor<fp16, [1280]> input_313_beta_0_to_fp16 = const()[name = tensor<string, []>("input_313_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1234613248)))];
+            tensor<fp16, []> var_8479_to_fp16 = const()[name = tensor<string, []>("op_8479_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_313_cast_fp16 = layer_norm(axes = input_313_axes_0, beta = input_313_beta_0_to_fp16, epsilon = var_8479_to_fp16, gamma = input_313_gamma_0_to_fp16, x = inputs_125_cast_fp16)[name = tensor<string, []>("input_313_cast_fp16")];
+            tensor<string, []> q_pad_type_0 = const()[name = tensor<string, []>("q_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_strides_0 = const()[name = tensor<string, []>("q_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_pad_0 = const()[name = tensor<string, []>("q_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_dilations_0 = const()[name = tensor<string, []>("q_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_groups_0 = const()[name = tensor<string, []>("q_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> var_8514_weight_0_to_fp16 = const()[name = tensor<string, []>("op_8514_weight_0_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1234615872)))];
+            tensor<fp16, [1280]> var_8514_bias_0_to_fp16 = const()[name = tensor<string, []>("op_8514_bias_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1237892736)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8514_cast_fp16 = conv(bias = var_8514_bias_0_to_fp16, dilations = q_dilations_0, groups = q_groups_0, pad = q_pad_0, pad_type = q_pad_type_0, strides = q_strides_0, weight = var_8514_weight_0_to_fp16, x = input_313_cast_fp16)[name = tensor<string, []>("op_8514_cast_fp16")];
+            tensor<string, []> k_pad_type_0 = const()[name = tensor<string, []>("k_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_strides_0 = const()[name = tensor<string, []>("k_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_pad_0 = const()[name = tensor<string, []>("k_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_dilations_0 = const()[name = tensor<string, []>("k_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_groups_0 = const()[name = tensor<string, []>("k_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_31_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_31_attn_key_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1237895360)))];
+            tensor<fp16, [1, 1280, 1, 1500]> k_cast_fp16 = conv(dilations = k_dilations_0, groups = k_groups_0, pad = k_pad_0, pad_type = k_pad_type_0, strides = k_strides_0, weight = blocks_31_attn_key_weight_to_fp16, x = input_313_cast_fp16)[name = tensor<string, []>("k_cast_fp16")];
+            tensor<string, []> var_8512_pad_type_0 = const()[name = tensor<string, []>("op_8512_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8512_strides_0 = const()[name = tensor<string, []>("op_8512_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8512_pad_0 = const()[name = tensor<string, []>("op_8512_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8512_dilations_0 = const()[name = tensor<string, []>("op_8512_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8512_groups_0 = const()[name = tensor<string, []>("op_8512_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_31_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_31_attn_value_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1241172224)))];
+            tensor<fp16, [1280]> blocks_31_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_31_attn_value_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1244449088)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8512_cast_fp16 = conv(bias = blocks_31_attn_value_bias_to_fp16, dilations = var_8512_dilations_0, groups = var_8512_groups_0, pad = var_8512_pad_0, pad_type = var_8512_pad_type_0, strides = var_8512_strides_0, weight = blocks_31_attn_value_weight_to_fp16, x = input_313_cast_fp16)[name = tensor<string, []>("op_8512_cast_fp16")];
+            tensor<int32, [20]> tile_93 = const()[name = tensor<string, []>("tile_93"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8515_axis_0 = const()[name = tensor<string, []>("op_8515_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_8515_cast_fp16_19 = split(axis = var_8515_axis_0, split_sizes = tile_93, x = var_8514_cast_fp16)[name = tensor<string, []>("op_8515_cast_fp16")];
+            tensor<int32, [4]> var_8536_perm_0 = const()[name = tensor<string, []>("op_8536_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [20]> tile_94 = const()[name = tensor<string, []>("tile_94"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8537_axis_0 = const()[name = tensor<string, []>("op_8537_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1280]> var_8536_cast_fp16 = transpose(perm = var_8536_perm_0, x = k_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_15, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_16, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_17, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_18, tensor<fp16, [1, 1500, 1, 64]> var_8537_cast_fp16_19 = split(axis = var_8537_axis_0, split_sizes = tile_94, x = var_8536_cast_fp16)[name = tensor<string, []>("op_8537_cast_fp16")];
+            tensor<int32, [20]> tile_95 = const()[name = tensor<string, []>("tile_95"), val = tensor<int32, [20]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_8558_axis_0 = const()[name = tensor<string, []>("op_8558_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_15, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_16, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_17, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_18, tensor<fp16, [1, 64, 1, 1500]> var_8558_cast_fp16_19 = split(axis = var_8558_axis_0, split_sizes = tile_95, x = var_8512_cast_fp16)[name = tensor<string, []>("op_8558_cast_fp16")];
+            tensor<string, []> aw_1241_equation_0 = const()[name = tensor<string, []>("aw_1241_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1241_cast_fp16 = einsum(equation = aw_1241_equation_0, values = (var_8537_cast_fp16_0, var_8515_cast_fp16_0))[name = tensor<string, []>("aw_1241_cast_fp16")];
+            tensor<string, []> aw_1243_equation_0 = const()[name = tensor<string, []>("aw_1243_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1243_cast_fp16 = einsum(equation = aw_1243_equation_0, values = (var_8537_cast_fp16_1, var_8515_cast_fp16_1))[name = tensor<string, []>("aw_1243_cast_fp16")];
+            tensor<string, []> aw_1245_equation_0 = const()[name = tensor<string, []>("aw_1245_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1245_cast_fp16 = einsum(equation = aw_1245_equation_0, values = (var_8537_cast_fp16_2, var_8515_cast_fp16_2))[name = tensor<string, []>("aw_1245_cast_fp16")];
+            tensor<string, []> aw_1247_equation_0 = const()[name = tensor<string, []>("aw_1247_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1247_cast_fp16 = einsum(equation = aw_1247_equation_0, values = (var_8537_cast_fp16_3, var_8515_cast_fp16_3))[name = tensor<string, []>("aw_1247_cast_fp16")];
+            tensor<string, []> aw_1249_equation_0 = const()[name = tensor<string, []>("aw_1249_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1249_cast_fp16 = einsum(equation = aw_1249_equation_0, values = (var_8537_cast_fp16_4, var_8515_cast_fp16_4))[name = tensor<string, []>("aw_1249_cast_fp16")];
+            tensor<string, []> aw_1251_equation_0 = const()[name = tensor<string, []>("aw_1251_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1251_cast_fp16 = einsum(equation = aw_1251_equation_0, values = (var_8537_cast_fp16_5, var_8515_cast_fp16_5))[name = tensor<string, []>("aw_1251_cast_fp16")];
+            tensor<string, []> aw_1253_equation_0 = const()[name = tensor<string, []>("aw_1253_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1253_cast_fp16 = einsum(equation = aw_1253_equation_0, values = (var_8537_cast_fp16_6, var_8515_cast_fp16_6))[name = tensor<string, []>("aw_1253_cast_fp16")];
+            tensor<string, []> aw_1255_equation_0 = const()[name = tensor<string, []>("aw_1255_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1255_cast_fp16 = einsum(equation = aw_1255_equation_0, values = (var_8537_cast_fp16_7, var_8515_cast_fp16_7))[name = tensor<string, []>("aw_1255_cast_fp16")];
+            tensor<string, []> aw_1257_equation_0 = const()[name = tensor<string, []>("aw_1257_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1257_cast_fp16 = einsum(equation = aw_1257_equation_0, values = (var_8537_cast_fp16_8, var_8515_cast_fp16_8))[name = tensor<string, []>("aw_1257_cast_fp16")];
+            tensor<string, []> aw_1259_equation_0 = const()[name = tensor<string, []>("aw_1259_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1259_cast_fp16 = einsum(equation = aw_1259_equation_0, values = (var_8537_cast_fp16_9, var_8515_cast_fp16_9))[name = tensor<string, []>("aw_1259_cast_fp16")];
+            tensor<string, []> aw_1261_equation_0 = const()[name = tensor<string, []>("aw_1261_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1261_cast_fp16 = einsum(equation = aw_1261_equation_0, values = (var_8537_cast_fp16_10, var_8515_cast_fp16_10))[name = tensor<string, []>("aw_1261_cast_fp16")];
+            tensor<string, []> aw_1263_equation_0 = const()[name = tensor<string, []>("aw_1263_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1263_cast_fp16 = einsum(equation = aw_1263_equation_0, values = (var_8537_cast_fp16_11, var_8515_cast_fp16_11))[name = tensor<string, []>("aw_1263_cast_fp16")];
+            tensor<string, []> aw_1265_equation_0 = const()[name = tensor<string, []>("aw_1265_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1265_cast_fp16 = einsum(equation = aw_1265_equation_0, values = (var_8537_cast_fp16_12, var_8515_cast_fp16_12))[name = tensor<string, []>("aw_1265_cast_fp16")];
+            tensor<string, []> aw_1267_equation_0 = const()[name = tensor<string, []>("aw_1267_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1267_cast_fp16 = einsum(equation = aw_1267_equation_0, values = (var_8537_cast_fp16_13, var_8515_cast_fp16_13))[name = tensor<string, []>("aw_1267_cast_fp16")];
+            tensor<string, []> aw_1269_equation_0 = const()[name = tensor<string, []>("aw_1269_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1269_cast_fp16 = einsum(equation = aw_1269_equation_0, values = (var_8537_cast_fp16_14, var_8515_cast_fp16_14))[name = tensor<string, []>("aw_1269_cast_fp16")];
+            tensor<string, []> aw_1271_equation_0 = const()[name = tensor<string, []>("aw_1271_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1271_cast_fp16 = einsum(equation = aw_1271_equation_0, values = (var_8537_cast_fp16_15, var_8515_cast_fp16_15))[name = tensor<string, []>("aw_1271_cast_fp16")];
+            tensor<string, []> aw_1273_equation_0 = const()[name = tensor<string, []>("aw_1273_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1273_cast_fp16 = einsum(equation = aw_1273_equation_0, values = (var_8537_cast_fp16_16, var_8515_cast_fp16_16))[name = tensor<string, []>("aw_1273_cast_fp16")];
+            tensor<string, []> aw_1275_equation_0 = const()[name = tensor<string, []>("aw_1275_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1275_cast_fp16 = einsum(equation = aw_1275_equation_0, values = (var_8537_cast_fp16_17, var_8515_cast_fp16_17))[name = tensor<string, []>("aw_1275_cast_fp16")];
+            tensor<string, []> aw_1277_equation_0 = const()[name = tensor<string, []>("aw_1277_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1277_cast_fp16 = einsum(equation = aw_1277_equation_0, values = (var_8537_cast_fp16_18, var_8515_cast_fp16_18))[name = tensor<string, []>("aw_1277_cast_fp16")];
+            tensor<string, []> aw_equation_0 = const()[name = tensor<string, []>("aw_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_cast_fp16 = einsum(equation = aw_equation_0, values = (var_8537_cast_fp16_19, var_8515_cast_fp16_19))[name = tensor<string, []>("aw_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8619_cast_fp16 = softmax(axis = var_8463, x = aw_1241_cast_fp16)[name = tensor<string, []>("op_8619_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8620_cast_fp16 = softmax(axis = var_8463, x = aw_1243_cast_fp16)[name = tensor<string, []>("op_8620_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8621_cast_fp16 = softmax(axis = var_8463, x = aw_1245_cast_fp16)[name = tensor<string, []>("op_8621_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8622_cast_fp16 = softmax(axis = var_8463, x = aw_1247_cast_fp16)[name = tensor<string, []>("op_8622_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8623_cast_fp16 = softmax(axis = var_8463, x = aw_1249_cast_fp16)[name = tensor<string, []>("op_8623_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8624_cast_fp16 = softmax(axis = var_8463, x = aw_1251_cast_fp16)[name = tensor<string, []>("op_8624_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8625_cast_fp16 = softmax(axis = var_8463, x = aw_1253_cast_fp16)[name = tensor<string, []>("op_8625_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8626_cast_fp16 = softmax(axis = var_8463, x = aw_1255_cast_fp16)[name = tensor<string, []>("op_8626_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8627_cast_fp16 = softmax(axis = var_8463, x = aw_1257_cast_fp16)[name = tensor<string, []>("op_8627_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8628_cast_fp16 = softmax(axis = var_8463, x = aw_1259_cast_fp16)[name = tensor<string, []>("op_8628_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8629_cast_fp16 = softmax(axis = var_8463, x = aw_1261_cast_fp16)[name = tensor<string, []>("op_8629_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8630_cast_fp16 = softmax(axis = var_8463, x = aw_1263_cast_fp16)[name = tensor<string, []>("op_8630_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8631_cast_fp16 = softmax(axis = var_8463, x = aw_1265_cast_fp16)[name = tensor<string, []>("op_8631_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8632_cast_fp16 = softmax(axis = var_8463, x = aw_1267_cast_fp16)[name = tensor<string, []>("op_8632_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8633_cast_fp16 = softmax(axis = var_8463, x = aw_1269_cast_fp16)[name = tensor<string, []>("op_8633_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8634_cast_fp16 = softmax(axis = var_8463, x = aw_1271_cast_fp16)[name = tensor<string, []>("op_8634_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8635_cast_fp16 = softmax(axis = var_8463, x = aw_1273_cast_fp16)[name = tensor<string, []>("op_8635_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8636_cast_fp16 = softmax(axis = var_8463, x = aw_1275_cast_fp16)[name = tensor<string, []>("op_8636_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8637_cast_fp16 = softmax(axis = var_8463, x = aw_1277_cast_fp16)[name = tensor<string, []>("op_8637_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_8638_cast_fp16 = softmax(axis = var_8463, x = aw_cast_fp16)[name = tensor<string, []>("op_8638_cast_fp16")];
+            tensor<string, []> var_8640_equation_0 = const()[name = tensor<string, []>("op_8640_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8640_cast_fp16 = einsum(equation = var_8640_equation_0, values = (var_8558_cast_fp16_0, var_8619_cast_fp16))[name = tensor<string, []>("op_8640_cast_fp16")];
+            tensor<string, []> var_8642_equation_0 = const()[name = tensor<string, []>("op_8642_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8642_cast_fp16 = einsum(equation = var_8642_equation_0, values = (var_8558_cast_fp16_1, var_8620_cast_fp16))[name = tensor<string, []>("op_8642_cast_fp16")];
+            tensor<string, []> var_8644_equation_0 = const()[name = tensor<string, []>("op_8644_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8644_cast_fp16 = einsum(equation = var_8644_equation_0, values = (var_8558_cast_fp16_2, var_8621_cast_fp16))[name = tensor<string, []>("op_8644_cast_fp16")];
+            tensor<string, []> var_8646_equation_0 = const()[name = tensor<string, []>("op_8646_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8646_cast_fp16 = einsum(equation = var_8646_equation_0, values = (var_8558_cast_fp16_3, var_8622_cast_fp16))[name = tensor<string, []>("op_8646_cast_fp16")];
+            tensor<string, []> var_8648_equation_0 = const()[name = tensor<string, []>("op_8648_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8648_cast_fp16 = einsum(equation = var_8648_equation_0, values = (var_8558_cast_fp16_4, var_8623_cast_fp16))[name = tensor<string, []>("op_8648_cast_fp16")];
+            tensor<string, []> var_8650_equation_0 = const()[name = tensor<string, []>("op_8650_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8650_cast_fp16 = einsum(equation = var_8650_equation_0, values = (var_8558_cast_fp16_5, var_8624_cast_fp16))[name = tensor<string, []>("op_8650_cast_fp16")];
+            tensor<string, []> var_8652_equation_0 = const()[name = tensor<string, []>("op_8652_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8652_cast_fp16 = einsum(equation = var_8652_equation_0, values = (var_8558_cast_fp16_6, var_8625_cast_fp16))[name = tensor<string, []>("op_8652_cast_fp16")];
+            tensor<string, []> var_8654_equation_0 = const()[name = tensor<string, []>("op_8654_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8654_cast_fp16 = einsum(equation = var_8654_equation_0, values = (var_8558_cast_fp16_7, var_8626_cast_fp16))[name = tensor<string, []>("op_8654_cast_fp16")];
+            tensor<string, []> var_8656_equation_0 = const()[name = tensor<string, []>("op_8656_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8656_cast_fp16 = einsum(equation = var_8656_equation_0, values = (var_8558_cast_fp16_8, var_8627_cast_fp16))[name = tensor<string, []>("op_8656_cast_fp16")];
+            tensor<string, []> var_8658_equation_0 = const()[name = tensor<string, []>("op_8658_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8658_cast_fp16 = einsum(equation = var_8658_equation_0, values = (var_8558_cast_fp16_9, var_8628_cast_fp16))[name = tensor<string, []>("op_8658_cast_fp16")];
+            tensor<string, []> var_8660_equation_0 = const()[name = tensor<string, []>("op_8660_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8660_cast_fp16 = einsum(equation = var_8660_equation_0, values = (var_8558_cast_fp16_10, var_8629_cast_fp16))[name = tensor<string, []>("op_8660_cast_fp16")];
+            tensor<string, []> var_8662_equation_0 = const()[name = tensor<string, []>("op_8662_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8662_cast_fp16 = einsum(equation = var_8662_equation_0, values = (var_8558_cast_fp16_11, var_8630_cast_fp16))[name = tensor<string, []>("op_8662_cast_fp16")];
+            tensor<string, []> var_8664_equation_0 = const()[name = tensor<string, []>("op_8664_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8664_cast_fp16 = einsum(equation = var_8664_equation_0, values = (var_8558_cast_fp16_12, var_8631_cast_fp16))[name = tensor<string, []>("op_8664_cast_fp16")];
+            tensor<string, []> var_8666_equation_0 = const()[name = tensor<string, []>("op_8666_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8666_cast_fp16 = einsum(equation = var_8666_equation_0, values = (var_8558_cast_fp16_13, var_8632_cast_fp16))[name = tensor<string, []>("op_8666_cast_fp16")];
+            tensor<string, []> var_8668_equation_0 = const()[name = tensor<string, []>("op_8668_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8668_cast_fp16 = einsum(equation = var_8668_equation_0, values = (var_8558_cast_fp16_14, var_8633_cast_fp16))[name = tensor<string, []>("op_8668_cast_fp16")];
+            tensor<string, []> var_8670_equation_0 = const()[name = tensor<string, []>("op_8670_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8670_cast_fp16 = einsum(equation = var_8670_equation_0, values = (var_8558_cast_fp16_15, var_8634_cast_fp16))[name = tensor<string, []>("op_8670_cast_fp16")];
+            tensor<string, []> var_8672_equation_0 = const()[name = tensor<string, []>("op_8672_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8672_cast_fp16 = einsum(equation = var_8672_equation_0, values = (var_8558_cast_fp16_16, var_8635_cast_fp16))[name = tensor<string, []>("op_8672_cast_fp16")];
+            tensor<string, []> var_8674_equation_0 = const()[name = tensor<string, []>("op_8674_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8674_cast_fp16 = einsum(equation = var_8674_equation_0, values = (var_8558_cast_fp16_17, var_8636_cast_fp16))[name = tensor<string, []>("op_8674_cast_fp16")];
+            tensor<string, []> var_8676_equation_0 = const()[name = tensor<string, []>("op_8676_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8676_cast_fp16 = einsum(equation = var_8676_equation_0, values = (var_8558_cast_fp16_18, var_8637_cast_fp16))[name = tensor<string, []>("op_8676_cast_fp16")];
+            tensor<string, []> var_8678_equation_0 = const()[name = tensor<string, []>("op_8678_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_8678_cast_fp16 = einsum(equation = var_8678_equation_0, values = (var_8558_cast_fp16_19, var_8638_cast_fp16))[name = tensor<string, []>("op_8678_cast_fp16")];
+            tensor<bool, []> input_315_interleave_0 = const()[name = tensor<string, []>("input_315_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_315_cast_fp16 = concat(axis = var_8463, interleave = input_315_interleave_0, values = (var_8640_cast_fp16, var_8642_cast_fp16, var_8644_cast_fp16, var_8646_cast_fp16, var_8648_cast_fp16, var_8650_cast_fp16, var_8652_cast_fp16, var_8654_cast_fp16, var_8656_cast_fp16, var_8658_cast_fp16, var_8660_cast_fp16, var_8662_cast_fp16, var_8664_cast_fp16, var_8666_cast_fp16, var_8668_cast_fp16, var_8670_cast_fp16, var_8672_cast_fp16, var_8674_cast_fp16, var_8676_cast_fp16, var_8678_cast_fp16))[name = tensor<string, []>("input_315_cast_fp16")];
+            tensor<string, []> var_8687_pad_type_0 = const()[name = tensor<string, []>("op_8687_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8687_strides_0 = const()[name = tensor<string, []>("op_8687_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8687_pad_0 = const()[name = tensor<string, []>("op_8687_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8687_dilations_0 = const()[name = tensor<string, []>("op_8687_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8687_groups_0 = const()[name = tensor<string, []>("op_8687_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> blocks_31_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_31_attn_out_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1244451712)))];
+            tensor<fp16, [1280]> blocks_31_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_31_attn_out_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1247728576)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8687_cast_fp16 = conv(bias = blocks_31_attn_out_bias_to_fp16, dilations = var_8687_dilations_0, groups = var_8687_groups_0, pad = var_8687_pad_0, pad_type = var_8687_pad_type_0, strides = var_8687_strides_0, weight = blocks_31_attn_out_weight_to_fp16, x = input_315_cast_fp16)[name = tensor<string, []>("op_8687_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_127_cast_fp16 = add(x = inputs_125_cast_fp16, y = var_8687_cast_fp16)[name = tensor<string, []>("inputs_127_cast_fp16")];
+            tensor<int32, [1]> input_317_axes_0 = const()[name = tensor<string, []>("input_317_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> input_317_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_317_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1247731200)))];
+            tensor<fp16, [1280]> input_317_beta_0_to_fp16 = const()[name = tensor<string, []>("input_317_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1247733824)))];
+            tensor<fp16, []> var_8697_to_fp16 = const()[name = tensor<string, []>("op_8697_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_317_cast_fp16 = layer_norm(axes = input_317_axes_0, beta = input_317_beta_0_to_fp16, epsilon = var_8697_to_fp16, gamma = input_317_gamma_0_to_fp16, x = inputs_127_cast_fp16)[name = tensor<string, []>("input_317_cast_fp16")];
+            tensor<string, []> input_319_pad_type_0 = const()[name = tensor<string, []>("input_319_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_319_strides_0 = const()[name = tensor<string, []>("input_319_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_319_pad_0 = const()[name = tensor<string, []>("input_319_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_319_dilations_0 = const()[name = tensor<string, []>("input_319_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_319_groups_0 = const()[name = tensor<string, []>("input_319_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> blocks_31_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_31_mlp_0_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1247736448)))];
+            tensor<fp16, [5120]> blocks_31_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_31_mlp_0_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1260843712)))];
+            tensor<fp16, [1, 5120, 1, 1500]> input_319_cast_fp16 = conv(bias = blocks_31_mlp_0_bias_to_fp16, dilations = input_319_dilations_0, groups = input_319_groups_0, pad = input_319_pad_0, pad_type = input_319_pad_type_0, strides = input_319_strides_0, weight = blocks_31_mlp_0_weight_to_fp16, x = input_317_cast_fp16)[name = tensor<string, []>("input_319_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_319_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<string, []> var_8723_pad_type_0 = const()[name = tensor<string, []>("op_8723_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_8723_strides_0 = const()[name = tensor<string, []>("op_8723_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_8723_pad_0 = const()[name = tensor<string, []>("op_8723_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_8723_dilations_0 = const()[name = tensor<string, []>("op_8723_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_8723_groups_0 = const()[name = tensor<string, []>("op_8723_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> blocks_31_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_31_mlp_2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1260854016)))];
+            tensor<fp16, [1280]> blocks_31_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_31_mlp_2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1273961280)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_8723_cast_fp16 = conv(bias = blocks_31_mlp_2_bias_to_fp16, dilations = var_8723_dilations_0, groups = var_8723_groups_0, pad = var_8723_pad_0, pad_type = var_8723_pad_type_0, strides = var_8723_strides_0, weight = blocks_31_mlp_2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("op_8723_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_cast_fp16 = add(x = inputs_127_cast_fp16, y = var_8723_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<int32, [1]> x_axes_0 = const()[name = tensor<string, []>("x_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1280]> x_gamma_0_to_fp16 = const()[name = tensor<string, []>("x_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1273963904)))];
+            tensor<fp16, [1280]> x_beta_0_to_fp16 = const()[name = tensor<string, []>("x_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1273966528)))];
+            tensor<fp16, []> var_8737_to_fp16 = const()[name = tensor<string, []>("op_8737_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> x_cast_fp16 = layer_norm(axes = x_axes_0, beta = x_beta_0_to_fp16, epsilon = var_8737_to_fp16, gamma = x_gamma_0_to_fp16, x = inputs_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
+            tensor<int32, [1]> var_8748_axes_0 = const()[name = tensor<string, []>("op_8748_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1280, 1500]> var_8748_cast_fp16 = squeeze(axes = var_8748_axes_0, x = x_cast_fp16)[name = tensor<string, []>("op_8748_cast_fp16")];
+            tensor<int32, [3]> var_8751_perm_0 = const()[name = tensor<string, []>("op_8751_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<string, []> var_8751_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_8751_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
+            tensor<fp16, [1, 1500, 1280]> var_8751_cast_fp16 = transpose(perm = var_8751_perm_0, x = var_8748_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp32, [1, 1500, 1280]> output = cast(dtype = var_8751_cast_fp16_to_fp32_dtype_0, x = var_8751_cast_fp16)[name = tensor<string, []>("cast_131")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/large-v3/ggml-large-v3-encoder.mlmodelc/weights/weight.bin b/large-v3/ggml-large-v3-encoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d3c319e2907a1d272802edc0414226f7dd4697fd
--- /dev/null
+++ b/large-v3/ggml-large-v3-encoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:806bd7aef5df068fea795e1af8a671cf8817f42f5179e3624632bfbbcbad869f
+size 1273969152
diff --git a/large-v3/ggml-large-v3.bin b/large-v3/ggml-large-v3.bin
new file mode 100644
index 0000000000000000000000000000000000000000..30488f6b9eeae93e026c978ac7a3190274732ea2
--- /dev/null
+++ b/large-v3/ggml-large-v3.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64d182b440b98d5203c4f9bd541544d84c605196c4f7b845dfa11fb23594d1e2
+size 3095033483
diff --git a/medium.en/.DS_Store b/medium.en/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..b8941f18e809afaa65bda9ee38ee3e1f58bc17e0
Binary files /dev/null and b/medium.en/.DS_Store differ
diff --git a/medium.en/ggml-medium.en-encoder.mlmodelc/analytics/coremldata.bin b/medium.en/ggml-medium.en-encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..328e5c6707519542579df61365ef4c4391c16353
--- /dev/null
+++ b/medium.en/ggml-medium.en-encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:423209035d398f1e6945478cc49cf8e83022a3dc4e00d735c54289b3f62cdf89
+size 243
diff --git a/medium.en/ggml-medium.en-encoder.mlmodelc/coremldata.bin b/medium.en/ggml-medium.en-encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a7e2b54475c4dbc86435a318e0608d48945bae16
--- /dev/null
+++ b/medium.en/ggml-medium.en-encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d20a4fe17a031efc213c5c295df6967c6e87eba9cca3f07fa63c2beb835ca420
+size 320
diff --git a/medium.en/ggml-medium.en-encoder.mlmodelc/metadata.json b/medium.en/ggml-medium.en-encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..d641992607e37ca4cd71dc6a55c6bba8dd73f78b
--- /dev/null
+++ b/medium.en/ggml-medium.en-encoder.mlmodelc/metadata.json
@@ -0,0 +1,71 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1500 × 1024)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 1024]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 6,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 24,
+      "Gelu" : 26,
+      "LayerNorm" : 49,
+      "Transpose" : 25,
+      "Softmax" : 384,
+      "Squeeze" : 1,
+      "Cast" : 2,
+      "Add" : 49,
+      "Einsum" : 768,
+      "ExpandDims" : 1,
+      "Split" : 72,
+      "Conv" : 146
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "12.0",
+      "tvOS" : "15.0",
+      "visionOS" : "1.0",
+      "watchOS" : "8.0",
+      "iOS" : "15.0",
+      "macCatalyst" : "15.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.2.2",
+      "com.github.apple.coremltools.version" : "8.3.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 80 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "coreml_encoder_medium_en",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/medium.en/ggml-medium.en-encoder.mlmodelc/model.mil b/medium.en/ggml-medium.en-encoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..e6dfb1231cc22b9b4af2039b332f526ac8800584
--- /dev/null
+++ b/medium.en/ggml-medium.en-encoder.mlmodelc/model.mil
@@ -0,0 +1,3763 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.2.2"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
+{
+    func main<ios15>(tensor<fp32, [1, 80, 3000]> logmel_data) {
+            tensor<string, []> var_68_pad_type_0 = const()[name = tensor<string, []>("op_68_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_68_pad_0 = const()[name = tensor<string, []>("op_68_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_68_strides_0 = const()[name = tensor<string, []>("op_68_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_68_dilations_0 = const()[name = tensor<string, []>("op_68_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_68_groups_0 = const()[name = tensor<string, []>("op_68_groups_0"), val = tensor<int32, []>(1)];
+            tensor<string, []> logmel_data_to_fp16_dtype_0 = const()[name = tensor<string, []>("logmel_data_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [1024, 80, 3]> const_0_to_fp16 = const()[name = tensor<string, []>("const_0_to_fp16"), val = tensor<fp16, [1024, 80, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1024]> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(491648)))];
+            tensor<fp16, [1, 80, 3000]> logmel_data_to_fp16 = cast(dtype = logmel_data_to_fp16_dtype_0, x = logmel_data)[name = tensor<string, []>("cast_100")];
+            tensor<fp16, [1, 1024, 3000]> var_68_cast_fp16 = conv(bias = const_1_to_fp16, dilations = var_68_dilations_0, groups = var_68_groups_0, pad = var_68_pad_0, pad_type = var_68_pad_type_0, strides = var_68_strides_0, weight = const_0_to_fp16, x = logmel_data_to_fp16)[name = tensor<string, []>("op_68_cast_fp16")];
+            tensor<string, []> input_1_mode_0 = const()[name = tensor<string, []>("input_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1024, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_68_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<string, []> var_86_pad_type_0 = const()[name = tensor<string, []>("op_86_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_86_pad_0 = const()[name = tensor<string, []>("op_86_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_86_strides_0 = const()[name = tensor<string, []>("op_86_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_86_dilations_0 = const()[name = tensor<string, []>("op_86_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_86_groups_0 = const()[name = tensor<string, []>("op_86_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 3]> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, [1024, 1024, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(493760)))];
+            tensor<fp16, [1024]> const_3_to_fp16 = const()[name = tensor<string, []>("const_3_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6785280)))];
+            tensor<fp16, [1, 1024, 1500]> var_86_cast_fp16 = conv(bias = const_3_to_fp16, dilations = var_86_dilations_0, groups = var_86_groups_0, pad = var_86_pad_0, pad_type = var_86_pad_type_0, strides = var_86_strides_0, weight = const_2_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("op_86_cast_fp16")];
+            tensor<string, []> x_3_mode_0 = const()[name = tensor<string, []>("x_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1024, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_86_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
+            tensor<fp16, [1024, 1500]> var_91_to_fp16 = const()[name = tensor<string, []>("op_91_to_fp16"), val = tensor<fp16, [1024, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6787392)))];
+            tensor<fp16, [1, 1024, 1500]> var_93_cast_fp16 = add(x = x_3_cast_fp16, y = var_91_to_fp16)[name = tensor<string, []>("op_93_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_93_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_108 = const()[name = tensor<string, []>("op_108"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9859456)))];
+            tensor<fp16, [1024]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9861568)))];
+            tensor<fp16, []> var_124_to_fp16 = const()[name = tensor<string, []>("op_124_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_3_cast_fp16 = layer_norm(axes = input_3_axes_0, beta = input_3_beta_0_to_fp16, epsilon = var_124_to_fp16, gamma = input_3_gamma_0_to_fp16, x = inputs_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<string, []> q_1_pad_type_0 = const()[name = tensor<string, []>("q_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_1_strides_0 = const()[name = tensor<string, []>("q_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_1_pad_0 = const()[name = tensor<string, []>("q_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_1_dilations_0 = const()[name = tensor<string, []>("q_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_1_groups_0 = const()[name = tensor<string, []>("q_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_159_weight_0_to_fp16 = const()[name = tensor<string, []>("op_159_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9863680)))];
+            tensor<fp16, [1024]> var_159_bias_0_to_fp16 = const()[name = tensor<string, []>("op_159_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11960896)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_159_cast_fp16 = conv(bias = var_159_bias_0_to_fp16, dilations = q_1_dilations_0, groups = q_1_groups_0, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = q_1_strides_0, weight = var_159_weight_0_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_159_cast_fp16")];
+            tensor<string, []> k_1_pad_type_0 = const()[name = tensor<string, []>("k_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_1_strides_0 = const()[name = tensor<string, []>("k_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_1_pad_0 = const()[name = tensor<string, []>("k_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_1_dilations_0 = const()[name = tensor<string, []>("k_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_1_groups_0 = const()[name = tensor<string, []>("k_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_0_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11963008)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_1_cast_fp16 = conv(dilations = k_1_dilations_0, groups = k_1_groups_0, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = k_1_strides_0, weight = blocks_0_attn_key_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("k_1_cast_fp16")];
+            tensor<string, []> var_157_pad_type_0 = const()[name = tensor<string, []>("op_157_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_157_strides_0 = const()[name = tensor<string, []>("op_157_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_157_pad_0 = const()[name = tensor<string, []>("op_157_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_157_dilations_0 = const()[name = tensor<string, []>("op_157_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_157_groups_0 = const()[name = tensor<string, []>("op_157_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_0_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14060224)))];
+            tensor<fp16, [1024]> blocks_0_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16157440)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_157_cast_fp16 = conv(bias = blocks_0_attn_value_bias_to_fp16, dilations = var_157_dilations_0, groups = var_157_groups_0, pad = var_157_pad_0, pad_type = var_157_pad_type_0, strides = var_157_strides_0, weight = blocks_0_attn_value_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_157_cast_fp16")];
+            tensor<int32, [16]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_160_axis_0 = const()[name = tensor<string, []>("op_160_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_15 = split(axis = var_160_axis_0, split_sizes = tile_0, x = var_159_cast_fp16)[name = tensor<string, []>("op_160_cast_fp16")];
+            tensor<int32, [4]> var_177_perm_0 = const()[name = tensor<string, []>("op_177_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_178_axis_0 = const()[name = tensor<string, []>("op_178_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_177_cast_fp16 = transpose(perm = var_177_perm_0, x = k_1_cast_fp16)[name = tensor<string, []>("transpose_24")];
+            tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_15 = split(axis = var_178_axis_0, split_sizes = tile_1, x = var_177_cast_fp16)[name = tensor<string, []>("op_178_cast_fp16")];
+            tensor<int32, [16]> tile_2 = const()[name = tensor<string, []>("tile_2"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_195_axis_0 = const()[name = tensor<string, []>("op_195_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_15 = split(axis = var_195_axis_0, split_sizes = tile_2, x = var_157_cast_fp16)[name = tensor<string, []>("op_195_cast_fp16")];
+            tensor<string, []> aw_1_equation_0 = const()[name = tensor<string, []>("aw_1_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1_cast_fp16 = einsum(equation = aw_1_equation_0, values = (var_178_cast_fp16_0, var_160_cast_fp16_0))[name = tensor<string, []>("aw_1_cast_fp16")];
+            tensor<string, []> aw_3_equation_0 = const()[name = tensor<string, []>("aw_3_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_3_cast_fp16 = einsum(equation = aw_3_equation_0, values = (var_178_cast_fp16_1, var_160_cast_fp16_1))[name = tensor<string, []>("aw_3_cast_fp16")];
+            tensor<string, []> aw_5_equation_0 = const()[name = tensor<string, []>("aw_5_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_5_cast_fp16 = einsum(equation = aw_5_equation_0, values = (var_178_cast_fp16_2, var_160_cast_fp16_2))[name = tensor<string, []>("aw_5_cast_fp16")];
+            tensor<string, []> aw_7_equation_0 = const()[name = tensor<string, []>("aw_7_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_7_cast_fp16 = einsum(equation = aw_7_equation_0, values = (var_178_cast_fp16_3, var_160_cast_fp16_3))[name = tensor<string, []>("aw_7_cast_fp16")];
+            tensor<string, []> aw_9_equation_0 = const()[name = tensor<string, []>("aw_9_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_9_cast_fp16 = einsum(equation = aw_9_equation_0, values = (var_178_cast_fp16_4, var_160_cast_fp16_4))[name = tensor<string, []>("aw_9_cast_fp16")];
+            tensor<string, []> aw_11_equation_0 = const()[name = tensor<string, []>("aw_11_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_11_cast_fp16 = einsum(equation = aw_11_equation_0, values = (var_178_cast_fp16_5, var_160_cast_fp16_5))[name = tensor<string, []>("aw_11_cast_fp16")];
+            tensor<string, []> aw_13_equation_0 = const()[name = tensor<string, []>("aw_13_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_13_cast_fp16 = einsum(equation = aw_13_equation_0, values = (var_178_cast_fp16_6, var_160_cast_fp16_6))[name = tensor<string, []>("aw_13_cast_fp16")];
+            tensor<string, []> aw_15_equation_0 = const()[name = tensor<string, []>("aw_15_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_15_cast_fp16 = einsum(equation = aw_15_equation_0, values = (var_178_cast_fp16_7, var_160_cast_fp16_7))[name = tensor<string, []>("aw_15_cast_fp16")];
+            tensor<string, []> aw_17_equation_0 = const()[name = tensor<string, []>("aw_17_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_17_cast_fp16 = einsum(equation = aw_17_equation_0, values = (var_178_cast_fp16_8, var_160_cast_fp16_8))[name = tensor<string, []>("aw_17_cast_fp16")];
+            tensor<string, []> aw_19_equation_0 = const()[name = tensor<string, []>("aw_19_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_19_cast_fp16 = einsum(equation = aw_19_equation_0, values = (var_178_cast_fp16_9, var_160_cast_fp16_9))[name = tensor<string, []>("aw_19_cast_fp16")];
+            tensor<string, []> aw_21_equation_0 = const()[name = tensor<string, []>("aw_21_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_21_cast_fp16 = einsum(equation = aw_21_equation_0, values = (var_178_cast_fp16_10, var_160_cast_fp16_10))[name = tensor<string, []>("aw_21_cast_fp16")];
+            tensor<string, []> aw_23_equation_0 = const()[name = tensor<string, []>("aw_23_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_23_cast_fp16 = einsum(equation = aw_23_equation_0, values = (var_178_cast_fp16_11, var_160_cast_fp16_11))[name = tensor<string, []>("aw_23_cast_fp16")];
+            tensor<string, []> aw_25_equation_0 = const()[name = tensor<string, []>("aw_25_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_25_cast_fp16 = einsum(equation = aw_25_equation_0, values = (var_178_cast_fp16_12, var_160_cast_fp16_12))[name = tensor<string, []>("aw_25_cast_fp16")];
+            tensor<string, []> aw_27_equation_0 = const()[name = tensor<string, []>("aw_27_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_27_cast_fp16 = einsum(equation = aw_27_equation_0, values = (var_178_cast_fp16_13, var_160_cast_fp16_13))[name = tensor<string, []>("aw_27_cast_fp16")];
+            tensor<string, []> aw_29_equation_0 = const()[name = tensor<string, []>("aw_29_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_29_cast_fp16 = einsum(equation = aw_29_equation_0, values = (var_178_cast_fp16_14, var_160_cast_fp16_14))[name = tensor<string, []>("aw_29_cast_fp16")];
+            tensor<string, []> aw_31_equation_0 = const()[name = tensor<string, []>("aw_31_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_31_cast_fp16 = einsum(equation = aw_31_equation_0, values = (var_178_cast_fp16_15, var_160_cast_fp16_15))[name = tensor<string, []>("aw_31_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_244_cast_fp16 = softmax(axis = var_108, x = aw_1_cast_fp16)[name = tensor<string, []>("op_244_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_245_cast_fp16 = softmax(axis = var_108, x = aw_3_cast_fp16)[name = tensor<string, []>("op_245_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_246_cast_fp16 = softmax(axis = var_108, x = aw_5_cast_fp16)[name = tensor<string, []>("op_246_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_247_cast_fp16 = softmax(axis = var_108, x = aw_7_cast_fp16)[name = tensor<string, []>("op_247_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_248_cast_fp16 = softmax(axis = var_108, x = aw_9_cast_fp16)[name = tensor<string, []>("op_248_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_249_cast_fp16 = softmax(axis = var_108, x = aw_11_cast_fp16)[name = tensor<string, []>("op_249_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_250_cast_fp16 = softmax(axis = var_108, x = aw_13_cast_fp16)[name = tensor<string, []>("op_250_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_251_cast_fp16 = softmax(axis = var_108, x = aw_15_cast_fp16)[name = tensor<string, []>("op_251_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_252_cast_fp16 = softmax(axis = var_108, x = aw_17_cast_fp16)[name = tensor<string, []>("op_252_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_253_cast_fp16 = softmax(axis = var_108, x = aw_19_cast_fp16)[name = tensor<string, []>("op_253_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_254_cast_fp16 = softmax(axis = var_108, x = aw_21_cast_fp16)[name = tensor<string, []>("op_254_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_255_cast_fp16 = softmax(axis = var_108, x = aw_23_cast_fp16)[name = tensor<string, []>("op_255_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_256_cast_fp16 = softmax(axis = var_108, x = aw_25_cast_fp16)[name = tensor<string, []>("op_256_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_257_cast_fp16 = softmax(axis = var_108, x = aw_27_cast_fp16)[name = tensor<string, []>("op_257_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_258_cast_fp16 = softmax(axis = var_108, x = aw_29_cast_fp16)[name = tensor<string, []>("op_258_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_259_cast_fp16 = softmax(axis = var_108, x = aw_31_cast_fp16)[name = tensor<string, []>("op_259_cast_fp16")];
+            tensor<string, []> var_261_equation_0 = const()[name = tensor<string, []>("op_261_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_261_cast_fp16 = einsum(equation = var_261_equation_0, values = (var_195_cast_fp16_0, var_244_cast_fp16))[name = tensor<string, []>("op_261_cast_fp16")];
+            tensor<string, []> var_263_equation_0 = const()[name = tensor<string, []>("op_263_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_263_cast_fp16 = einsum(equation = var_263_equation_0, values = (var_195_cast_fp16_1, var_245_cast_fp16))[name = tensor<string, []>("op_263_cast_fp16")];
+            tensor<string, []> var_265_equation_0 = const()[name = tensor<string, []>("op_265_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_265_cast_fp16 = einsum(equation = var_265_equation_0, values = (var_195_cast_fp16_2, var_246_cast_fp16))[name = tensor<string, []>("op_265_cast_fp16")];
+            tensor<string, []> var_267_equation_0 = const()[name = tensor<string, []>("op_267_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_267_cast_fp16 = einsum(equation = var_267_equation_0, values = (var_195_cast_fp16_3, var_247_cast_fp16))[name = tensor<string, []>("op_267_cast_fp16")];
+            tensor<string, []> var_269_equation_0 = const()[name = tensor<string, []>("op_269_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_269_cast_fp16 = einsum(equation = var_269_equation_0, values = (var_195_cast_fp16_4, var_248_cast_fp16))[name = tensor<string, []>("op_269_cast_fp16")];
+            tensor<string, []> var_271_equation_0 = const()[name = tensor<string, []>("op_271_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_271_cast_fp16 = einsum(equation = var_271_equation_0, values = (var_195_cast_fp16_5, var_249_cast_fp16))[name = tensor<string, []>("op_271_cast_fp16")];
+            tensor<string, []> var_273_equation_0 = const()[name = tensor<string, []>("op_273_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_273_cast_fp16 = einsum(equation = var_273_equation_0, values = (var_195_cast_fp16_6, var_250_cast_fp16))[name = tensor<string, []>("op_273_cast_fp16")];
+            tensor<string, []> var_275_equation_0 = const()[name = tensor<string, []>("op_275_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_275_cast_fp16 = einsum(equation = var_275_equation_0, values = (var_195_cast_fp16_7, var_251_cast_fp16))[name = tensor<string, []>("op_275_cast_fp16")];
+            tensor<string, []> var_277_equation_0 = const()[name = tensor<string, []>("op_277_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_277_cast_fp16 = einsum(equation = var_277_equation_0, values = (var_195_cast_fp16_8, var_252_cast_fp16))[name = tensor<string, []>("op_277_cast_fp16")];
+            tensor<string, []> var_279_equation_0 = const()[name = tensor<string, []>("op_279_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_279_cast_fp16 = einsum(equation = var_279_equation_0, values = (var_195_cast_fp16_9, var_253_cast_fp16))[name = tensor<string, []>("op_279_cast_fp16")];
+            tensor<string, []> var_281_equation_0 = const()[name = tensor<string, []>("op_281_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_281_cast_fp16 = einsum(equation = var_281_equation_0, values = (var_195_cast_fp16_10, var_254_cast_fp16))[name = tensor<string, []>("op_281_cast_fp16")];
+            tensor<string, []> var_283_equation_0 = const()[name = tensor<string, []>("op_283_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_283_cast_fp16 = einsum(equation = var_283_equation_0, values = (var_195_cast_fp16_11, var_255_cast_fp16))[name = tensor<string, []>("op_283_cast_fp16")];
+            tensor<string, []> var_285_equation_0 = const()[name = tensor<string, []>("op_285_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_285_cast_fp16 = einsum(equation = var_285_equation_0, values = (var_195_cast_fp16_12, var_256_cast_fp16))[name = tensor<string, []>("op_285_cast_fp16")];
+            tensor<string, []> var_287_equation_0 = const()[name = tensor<string, []>("op_287_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_287_cast_fp16 = einsum(equation = var_287_equation_0, values = (var_195_cast_fp16_13, var_257_cast_fp16))[name = tensor<string, []>("op_287_cast_fp16")];
+            tensor<string, []> var_289_equation_0 = const()[name = tensor<string, []>("op_289_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_289_cast_fp16 = einsum(equation = var_289_equation_0, values = (var_195_cast_fp16_14, var_258_cast_fp16))[name = tensor<string, []>("op_289_cast_fp16")];
+            tensor<string, []> var_291_equation_0 = const()[name = tensor<string, []>("op_291_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_291_cast_fp16 = einsum(equation = var_291_equation_0, values = (var_195_cast_fp16_15, var_259_cast_fp16))[name = tensor<string, []>("op_291_cast_fp16")];
+            tensor<bool, []> input_5_interleave_0 = const()[name = tensor<string, []>("input_5_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_5_cast_fp16 = concat(axis = var_108, interleave = input_5_interleave_0, values = (var_261_cast_fp16, var_263_cast_fp16, var_265_cast_fp16, var_267_cast_fp16, var_269_cast_fp16, var_271_cast_fp16, var_273_cast_fp16, var_275_cast_fp16, var_277_cast_fp16, var_279_cast_fp16, var_281_cast_fp16, var_283_cast_fp16, var_285_cast_fp16, var_287_cast_fp16, var_289_cast_fp16, var_291_cast_fp16))[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> var_300_pad_type_0 = const()[name = tensor<string, []>("op_300_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_300_strides_0 = const()[name = tensor<string, []>("op_300_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_300_pad_0 = const()[name = tensor<string, []>("op_300_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_300_dilations_0 = const()[name = tensor<string, []>("op_300_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_300_groups_0 = const()[name = tensor<string, []>("op_300_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_0_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16159552)))];
+            tensor<fp16, [1024]> blocks_0_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18256768)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_300_cast_fp16 = conv(bias = blocks_0_attn_out_bias_to_fp16, dilations = var_300_dilations_0, groups = var_300_groups_0, pad = var_300_pad_0, pad_type = var_300_pad_type_0, strides = var_300_strides_0, weight = blocks_0_attn_out_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("op_300_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = var_300_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> input_7_axes_0 = const()[name = tensor<string, []>("input_7_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_7_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_7_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18258880)))];
+            tensor<fp16, [1024]> input_7_beta_0_to_fp16 = const()[name = tensor<string, []>("input_7_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18260992)))];
+            tensor<fp16, []> var_310_to_fp16 = const()[name = tensor<string, []>("op_310_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = input_7_beta_0_to_fp16, epsilon = var_310_to_fp16, gamma = input_7_gamma_0_to_fp16, x = inputs_3_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_pad_type_0 = const()[name = tensor<string, []>("input_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_9_strides_0 = const()[name = tensor<string, []>("input_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_9_pad_0 = const()[name = tensor<string, []>("input_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_9_dilations_0 = const()[name = tensor<string, []>("input_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_9_groups_0 = const()[name = tensor<string, []>("input_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_0_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18263104)))];
+            tensor<fp16, [4096]> blocks_0_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26651776)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_9_cast_fp16 = conv(bias = blocks_0_mlp_0_bias_to_fp16, dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = blocks_0_mlp_0_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<string, []> input_11_mode_0 = const()[name = tensor<string, []>("input_11_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_11_cast_fp16 = gelu(mode = input_11_mode_0, x = input_9_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<string, []> var_336_pad_type_0 = const()[name = tensor<string, []>("op_336_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_336_strides_0 = const()[name = tensor<string, []>("op_336_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_336_pad_0 = const()[name = tensor<string, []>("op_336_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_336_dilations_0 = const()[name = tensor<string, []>("op_336_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_336_groups_0 = const()[name = tensor<string, []>("op_336_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_0_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26660032)))];
+            tensor<fp16, [1024]> blocks_0_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35048704)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_336_cast_fp16 = conv(bias = blocks_0_mlp_2_bias_to_fp16, dilations = var_336_dilations_0, groups = var_336_groups_0, pad = var_336_pad_0, pad_type = var_336_pad_type_0, strides = var_336_strides_0, weight = blocks_0_mlp_2_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("op_336_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = var_336_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_345 = const()[name = tensor<string, []>("op_345"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_13_axes_0 = const()[name = tensor<string, []>("input_13_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_13_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35050816)))];
+            tensor<fp16, [1024]> input_13_beta_0_to_fp16 = const()[name = tensor<string, []>("input_13_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35052928)))];
+            tensor<fp16, []> var_361_to_fp16 = const()[name = tensor<string, []>("op_361_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = input_13_beta_0_to_fp16, epsilon = var_361_to_fp16, gamma = input_13_gamma_0_to_fp16, x = inputs_5_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> q_3_pad_type_0 = const()[name = tensor<string, []>("q_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_3_strides_0 = const()[name = tensor<string, []>("q_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_3_pad_0 = const()[name = tensor<string, []>("q_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_3_dilations_0 = const()[name = tensor<string, []>("q_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_3_groups_0 = const()[name = tensor<string, []>("q_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_396_weight_0_to_fp16 = const()[name = tensor<string, []>("op_396_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35055040)))];
+            tensor<fp16, [1024]> var_396_bias_0_to_fp16 = const()[name = tensor<string, []>("op_396_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37152256)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_396_cast_fp16 = conv(bias = var_396_bias_0_to_fp16, dilations = q_3_dilations_0, groups = q_3_groups_0, pad = q_3_pad_0, pad_type = q_3_pad_type_0, strides = q_3_strides_0, weight = var_396_weight_0_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_396_cast_fp16")];
+            tensor<string, []> k_3_pad_type_0 = const()[name = tensor<string, []>("k_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_3_strides_0 = const()[name = tensor<string, []>("k_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_3_pad_0 = const()[name = tensor<string, []>("k_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_3_dilations_0 = const()[name = tensor<string, []>("k_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_3_groups_0 = const()[name = tensor<string, []>("k_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_1_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37154368)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_3_cast_fp16 = conv(dilations = k_3_dilations_0, groups = k_3_groups_0, pad = k_3_pad_0, pad_type = k_3_pad_type_0, strides = k_3_strides_0, weight = blocks_1_attn_key_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("k_3_cast_fp16")];
+            tensor<string, []> var_394_pad_type_0 = const()[name = tensor<string, []>("op_394_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_394_strides_0 = const()[name = tensor<string, []>("op_394_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_394_pad_0 = const()[name = tensor<string, []>("op_394_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_394_dilations_0 = const()[name = tensor<string, []>("op_394_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_394_groups_0 = const()[name = tensor<string, []>("op_394_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_1_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39251584)))];
+            tensor<fp16, [1024]> blocks_1_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41348800)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_394_cast_fp16 = conv(bias = blocks_1_attn_value_bias_to_fp16, dilations = var_394_dilations_0, groups = var_394_groups_0, pad = var_394_pad_0, pad_type = var_394_pad_type_0, strides = var_394_strides_0, weight = blocks_1_attn_value_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_394_cast_fp16")];
+            tensor<int32, [16]> tile_3 = const()[name = tensor<string, []>("tile_3"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_397_axis_0 = const()[name = tensor<string, []>("op_397_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_15 = split(axis = var_397_axis_0, split_sizes = tile_3, x = var_396_cast_fp16)[name = tensor<string, []>("op_397_cast_fp16")];
+            tensor<int32, [4]> var_414_perm_0 = const()[name = tensor<string, []>("op_414_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_4 = const()[name = tensor<string, []>("tile_4"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_415_axis_0 = const()[name = tensor<string, []>("op_415_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_414_cast_fp16 = transpose(perm = var_414_perm_0, x = k_3_cast_fp16)[name = tensor<string, []>("transpose_23")];
+            tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_15 = split(axis = var_415_axis_0, split_sizes = tile_4, x = var_414_cast_fp16)[name = tensor<string, []>("op_415_cast_fp16")];
+            tensor<int32, [16]> tile_5 = const()[name = tensor<string, []>("tile_5"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_432_axis_0 = const()[name = tensor<string, []>("op_432_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_15 = split(axis = var_432_axis_0, split_sizes = tile_5, x = var_394_cast_fp16)[name = tensor<string, []>("op_432_cast_fp16")];
+            tensor<string, []> aw_33_equation_0 = const()[name = tensor<string, []>("aw_33_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_33_cast_fp16 = einsum(equation = aw_33_equation_0, values = (var_415_cast_fp16_0, var_397_cast_fp16_0))[name = tensor<string, []>("aw_33_cast_fp16")];
+            tensor<string, []> aw_35_equation_0 = const()[name = tensor<string, []>("aw_35_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_35_cast_fp16 = einsum(equation = aw_35_equation_0, values = (var_415_cast_fp16_1, var_397_cast_fp16_1))[name = tensor<string, []>("aw_35_cast_fp16")];
+            tensor<string, []> aw_37_equation_0 = const()[name = tensor<string, []>("aw_37_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_37_cast_fp16 = einsum(equation = aw_37_equation_0, values = (var_415_cast_fp16_2, var_397_cast_fp16_2))[name = tensor<string, []>("aw_37_cast_fp16")];
+            tensor<string, []> aw_39_equation_0 = const()[name = tensor<string, []>("aw_39_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_39_cast_fp16 = einsum(equation = aw_39_equation_0, values = (var_415_cast_fp16_3, var_397_cast_fp16_3))[name = tensor<string, []>("aw_39_cast_fp16")];
+            tensor<string, []> aw_41_equation_0 = const()[name = tensor<string, []>("aw_41_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_41_cast_fp16 = einsum(equation = aw_41_equation_0, values = (var_415_cast_fp16_4, var_397_cast_fp16_4))[name = tensor<string, []>("aw_41_cast_fp16")];
+            tensor<string, []> aw_43_equation_0 = const()[name = tensor<string, []>("aw_43_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_43_cast_fp16 = einsum(equation = aw_43_equation_0, values = (var_415_cast_fp16_5, var_397_cast_fp16_5))[name = tensor<string, []>("aw_43_cast_fp16")];
+            tensor<string, []> aw_45_equation_0 = const()[name = tensor<string, []>("aw_45_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_45_cast_fp16 = einsum(equation = aw_45_equation_0, values = (var_415_cast_fp16_6, var_397_cast_fp16_6))[name = tensor<string, []>("aw_45_cast_fp16")];
+            tensor<string, []> aw_47_equation_0 = const()[name = tensor<string, []>("aw_47_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_47_cast_fp16 = einsum(equation = aw_47_equation_0, values = (var_415_cast_fp16_7, var_397_cast_fp16_7))[name = tensor<string, []>("aw_47_cast_fp16")];
+            tensor<string, []> aw_49_equation_0 = const()[name = tensor<string, []>("aw_49_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_49_cast_fp16 = einsum(equation = aw_49_equation_0, values = (var_415_cast_fp16_8, var_397_cast_fp16_8))[name = tensor<string, []>("aw_49_cast_fp16")];
+            tensor<string, []> aw_51_equation_0 = const()[name = tensor<string, []>("aw_51_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_51_cast_fp16 = einsum(equation = aw_51_equation_0, values = (var_415_cast_fp16_9, var_397_cast_fp16_9))[name = tensor<string, []>("aw_51_cast_fp16")];
+            tensor<string, []> aw_53_equation_0 = const()[name = tensor<string, []>("aw_53_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_53_cast_fp16 = einsum(equation = aw_53_equation_0, values = (var_415_cast_fp16_10, var_397_cast_fp16_10))[name = tensor<string, []>("aw_53_cast_fp16")];
+            tensor<string, []> aw_55_equation_0 = const()[name = tensor<string, []>("aw_55_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_55_cast_fp16 = einsum(equation = aw_55_equation_0, values = (var_415_cast_fp16_11, var_397_cast_fp16_11))[name = tensor<string, []>("aw_55_cast_fp16")];
+            tensor<string, []> aw_57_equation_0 = const()[name = tensor<string, []>("aw_57_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_57_cast_fp16 = einsum(equation = aw_57_equation_0, values = (var_415_cast_fp16_12, var_397_cast_fp16_12))[name = tensor<string, []>("aw_57_cast_fp16")];
+            tensor<string, []> aw_59_equation_0 = const()[name = tensor<string, []>("aw_59_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_59_cast_fp16 = einsum(equation = aw_59_equation_0, values = (var_415_cast_fp16_13, var_397_cast_fp16_13))[name = tensor<string, []>("aw_59_cast_fp16")];
+            tensor<string, []> aw_61_equation_0 = const()[name = tensor<string, []>("aw_61_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_61_cast_fp16 = einsum(equation = aw_61_equation_0, values = (var_415_cast_fp16_14, var_397_cast_fp16_14))[name = tensor<string, []>("aw_61_cast_fp16")];
+            tensor<string, []> aw_63_equation_0 = const()[name = tensor<string, []>("aw_63_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_63_cast_fp16 = einsum(equation = aw_63_equation_0, values = (var_415_cast_fp16_15, var_397_cast_fp16_15))[name = tensor<string, []>("aw_63_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_481_cast_fp16 = softmax(axis = var_345, x = aw_33_cast_fp16)[name = tensor<string, []>("op_481_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_482_cast_fp16 = softmax(axis = var_345, x = aw_35_cast_fp16)[name = tensor<string, []>("op_482_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_483_cast_fp16 = softmax(axis = var_345, x = aw_37_cast_fp16)[name = tensor<string, []>("op_483_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_484_cast_fp16 = softmax(axis = var_345, x = aw_39_cast_fp16)[name = tensor<string, []>("op_484_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_485_cast_fp16 = softmax(axis = var_345, x = aw_41_cast_fp16)[name = tensor<string, []>("op_485_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_486_cast_fp16 = softmax(axis = var_345, x = aw_43_cast_fp16)[name = tensor<string, []>("op_486_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_487_cast_fp16 = softmax(axis = var_345, x = aw_45_cast_fp16)[name = tensor<string, []>("op_487_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_488_cast_fp16 = softmax(axis = var_345, x = aw_47_cast_fp16)[name = tensor<string, []>("op_488_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_489_cast_fp16 = softmax(axis = var_345, x = aw_49_cast_fp16)[name = tensor<string, []>("op_489_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_490_cast_fp16 = softmax(axis = var_345, x = aw_51_cast_fp16)[name = tensor<string, []>("op_490_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_491_cast_fp16 = softmax(axis = var_345, x = aw_53_cast_fp16)[name = tensor<string, []>("op_491_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_492_cast_fp16 = softmax(axis = var_345, x = aw_55_cast_fp16)[name = tensor<string, []>("op_492_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_493_cast_fp16 = softmax(axis = var_345, x = aw_57_cast_fp16)[name = tensor<string, []>("op_493_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_494_cast_fp16 = softmax(axis = var_345, x = aw_59_cast_fp16)[name = tensor<string, []>("op_494_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_495_cast_fp16 = softmax(axis = var_345, x = aw_61_cast_fp16)[name = tensor<string, []>("op_495_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_496_cast_fp16 = softmax(axis = var_345, x = aw_63_cast_fp16)[name = tensor<string, []>("op_496_cast_fp16")];
+            tensor<string, []> var_498_equation_0 = const()[name = tensor<string, []>("op_498_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_498_cast_fp16 = einsum(equation = var_498_equation_0, values = (var_432_cast_fp16_0, var_481_cast_fp16))[name = tensor<string, []>("op_498_cast_fp16")];
+            tensor<string, []> var_500_equation_0 = const()[name = tensor<string, []>("op_500_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_500_cast_fp16 = einsum(equation = var_500_equation_0, values = (var_432_cast_fp16_1, var_482_cast_fp16))[name = tensor<string, []>("op_500_cast_fp16")];
+            tensor<string, []> var_502_equation_0 = const()[name = tensor<string, []>("op_502_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_502_cast_fp16 = einsum(equation = var_502_equation_0, values = (var_432_cast_fp16_2, var_483_cast_fp16))[name = tensor<string, []>("op_502_cast_fp16")];
+            tensor<string, []> var_504_equation_0 = const()[name = tensor<string, []>("op_504_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_504_cast_fp16 = einsum(equation = var_504_equation_0, values = (var_432_cast_fp16_3, var_484_cast_fp16))[name = tensor<string, []>("op_504_cast_fp16")];
+            tensor<string, []> var_506_equation_0 = const()[name = tensor<string, []>("op_506_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_506_cast_fp16 = einsum(equation = var_506_equation_0, values = (var_432_cast_fp16_4, var_485_cast_fp16))[name = tensor<string, []>("op_506_cast_fp16")];
+            tensor<string, []> var_508_equation_0 = const()[name = tensor<string, []>("op_508_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_508_cast_fp16 = einsum(equation = var_508_equation_0, values = (var_432_cast_fp16_5, var_486_cast_fp16))[name = tensor<string, []>("op_508_cast_fp16")];
+            tensor<string, []> var_510_equation_0 = const()[name = tensor<string, []>("op_510_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_510_cast_fp16 = einsum(equation = var_510_equation_0, values = (var_432_cast_fp16_6, var_487_cast_fp16))[name = tensor<string, []>("op_510_cast_fp16")];
+            tensor<string, []> var_512_equation_0 = const()[name = tensor<string, []>("op_512_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_512_cast_fp16 = einsum(equation = var_512_equation_0, values = (var_432_cast_fp16_7, var_488_cast_fp16))[name = tensor<string, []>("op_512_cast_fp16")];
+            tensor<string, []> var_514_equation_0 = const()[name = tensor<string, []>("op_514_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_514_cast_fp16 = einsum(equation = var_514_equation_0, values = (var_432_cast_fp16_8, var_489_cast_fp16))[name = tensor<string, []>("op_514_cast_fp16")];
+            tensor<string, []> var_516_equation_0 = const()[name = tensor<string, []>("op_516_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_516_cast_fp16 = einsum(equation = var_516_equation_0, values = (var_432_cast_fp16_9, var_490_cast_fp16))[name = tensor<string, []>("op_516_cast_fp16")];
+            tensor<string, []> var_518_equation_0 = const()[name = tensor<string, []>("op_518_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_518_cast_fp16 = einsum(equation = var_518_equation_0, values = (var_432_cast_fp16_10, var_491_cast_fp16))[name = tensor<string, []>("op_518_cast_fp16")];
+            tensor<string, []> var_520_equation_0 = const()[name = tensor<string, []>("op_520_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_520_cast_fp16 = einsum(equation = var_520_equation_0, values = (var_432_cast_fp16_11, var_492_cast_fp16))[name = tensor<string, []>("op_520_cast_fp16")];
+            tensor<string, []> var_522_equation_0 = const()[name = tensor<string, []>("op_522_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_522_cast_fp16 = einsum(equation = var_522_equation_0, values = (var_432_cast_fp16_12, var_493_cast_fp16))[name = tensor<string, []>("op_522_cast_fp16")];
+            tensor<string, []> var_524_equation_0 = const()[name = tensor<string, []>("op_524_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_524_cast_fp16 = einsum(equation = var_524_equation_0, values = (var_432_cast_fp16_13, var_494_cast_fp16))[name = tensor<string, []>("op_524_cast_fp16")];
+            tensor<string, []> var_526_equation_0 = const()[name = tensor<string, []>("op_526_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_526_cast_fp16 = einsum(equation = var_526_equation_0, values = (var_432_cast_fp16_14, var_495_cast_fp16))[name = tensor<string, []>("op_526_cast_fp16")];
+            tensor<string, []> var_528_equation_0 = const()[name = tensor<string, []>("op_528_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_528_cast_fp16 = einsum(equation = var_528_equation_0, values = (var_432_cast_fp16_15, var_496_cast_fp16))[name = tensor<string, []>("op_528_cast_fp16")];
+            tensor<bool, []> input_15_interleave_0 = const()[name = tensor<string, []>("input_15_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_15_cast_fp16 = concat(axis = var_345, interleave = input_15_interleave_0, values = (var_498_cast_fp16, var_500_cast_fp16, var_502_cast_fp16, var_504_cast_fp16, var_506_cast_fp16, var_508_cast_fp16, var_510_cast_fp16, var_512_cast_fp16, var_514_cast_fp16, var_516_cast_fp16, var_518_cast_fp16, var_520_cast_fp16, var_522_cast_fp16, var_524_cast_fp16, var_526_cast_fp16, var_528_cast_fp16))[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<string, []> var_537_pad_type_0 = const()[name = tensor<string, []>("op_537_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_537_strides_0 = const()[name = tensor<string, []>("op_537_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_537_pad_0 = const()[name = tensor<string, []>("op_537_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_537_dilations_0 = const()[name = tensor<string, []>("op_537_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_537_groups_0 = const()[name = tensor<string, []>("op_537_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_1_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41350912)))];
+            tensor<fp16, [1024]> blocks_1_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43448128)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_537_cast_fp16 = conv(bias = blocks_1_attn_out_bias_to_fp16, dilations = var_537_dilations_0, groups = var_537_groups_0, pad = var_537_pad_0, pad_type = var_537_pad_type_0, strides = var_537_strides_0, weight = blocks_1_attn_out_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("op_537_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = var_537_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> input_17_axes_0 = const()[name = tensor<string, []>("input_17_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_17_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_17_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43450240)))];
+            tensor<fp16, [1024]> input_17_beta_0_to_fp16 = const()[name = tensor<string, []>("input_17_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43452352)))];
+            tensor<fp16, []> var_547_to_fp16 = const()[name = tensor<string, []>("op_547_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_17_cast_fp16 = layer_norm(axes = input_17_axes_0, beta = input_17_beta_0_to_fp16, epsilon = var_547_to_fp16, gamma = input_17_gamma_0_to_fp16, x = inputs_7_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_pad_type_0 = const()[name = tensor<string, []>("input_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_19_strides_0 = const()[name = tensor<string, []>("input_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_19_pad_0 = const()[name = tensor<string, []>("input_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_19_dilations_0 = const()[name = tensor<string, []>("input_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_19_groups_0 = const()[name = tensor<string, []>("input_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_1_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43454464)))];
+            tensor<fp16, [4096]> blocks_1_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51843136)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_19_cast_fp16 = conv(bias = blocks_1_mlp_0_bias_to_fp16, dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = blocks_1_mlp_0_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<string, []> input_21_mode_0 = const()[name = tensor<string, []>("input_21_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_21_cast_fp16 = gelu(mode = input_21_mode_0, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> var_573_pad_type_0 = const()[name = tensor<string, []>("op_573_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_573_strides_0 = const()[name = tensor<string, []>("op_573_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_573_pad_0 = const()[name = tensor<string, []>("op_573_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_573_dilations_0 = const()[name = tensor<string, []>("op_573_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_573_groups_0 = const()[name = tensor<string, []>("op_573_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_1_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51851392)))];
+            tensor<fp16, [1024]> blocks_1_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60240064)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_573_cast_fp16 = conv(bias = blocks_1_mlp_2_bias_to_fp16, dilations = var_573_dilations_0, groups = var_573_groups_0, pad = var_573_pad_0, pad_type = var_573_pad_type_0, strides = var_573_strides_0, weight = blocks_1_mlp_2_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("op_573_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = var_573_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_582 = const()[name = tensor<string, []>("op_582"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_23_axes_0 = const()[name = tensor<string, []>("input_23_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_23_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60242176)))];
+            tensor<fp16, [1024]> input_23_beta_0_to_fp16 = const()[name = tensor<string, []>("input_23_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60244288)))];
+            tensor<fp16, []> var_598_to_fp16 = const()[name = tensor<string, []>("op_598_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_23_cast_fp16 = layer_norm(axes = input_23_axes_0, beta = input_23_beta_0_to_fp16, epsilon = var_598_to_fp16, gamma = input_23_gamma_0_to_fp16, x = inputs_9_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<string, []> q_5_pad_type_0 = const()[name = tensor<string, []>("q_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_5_strides_0 = const()[name = tensor<string, []>("q_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_5_pad_0 = const()[name = tensor<string, []>("q_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_5_dilations_0 = const()[name = tensor<string, []>("q_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_5_groups_0 = const()[name = tensor<string, []>("q_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_633_weight_0_to_fp16 = const()[name = tensor<string, []>("op_633_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60246400)))];
+            tensor<fp16, [1024]> var_633_bias_0_to_fp16 = const()[name = tensor<string, []>("op_633_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62343616)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_633_cast_fp16 = conv(bias = var_633_bias_0_to_fp16, dilations = q_5_dilations_0, groups = q_5_groups_0, pad = q_5_pad_0, pad_type = q_5_pad_type_0, strides = q_5_strides_0, weight = var_633_weight_0_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_633_cast_fp16")];
+            tensor<string, []> k_5_pad_type_0 = const()[name = tensor<string, []>("k_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_5_strides_0 = const()[name = tensor<string, []>("k_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_5_pad_0 = const()[name = tensor<string, []>("k_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_5_dilations_0 = const()[name = tensor<string, []>("k_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_5_groups_0 = const()[name = tensor<string, []>("k_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_2_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62345728)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_5_cast_fp16 = conv(dilations = k_5_dilations_0, groups = k_5_groups_0, pad = k_5_pad_0, pad_type = k_5_pad_type_0, strides = k_5_strides_0, weight = blocks_2_attn_key_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("k_5_cast_fp16")];
+            tensor<string, []> var_631_pad_type_0 = const()[name = tensor<string, []>("op_631_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_631_strides_0 = const()[name = tensor<string, []>("op_631_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_631_pad_0 = const()[name = tensor<string, []>("op_631_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_631_dilations_0 = const()[name = tensor<string, []>("op_631_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_631_groups_0 = const()[name = tensor<string, []>("op_631_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_2_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64442944)))];
+            tensor<fp16, [1024]> blocks_2_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66540160)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_631_cast_fp16 = conv(bias = blocks_2_attn_value_bias_to_fp16, dilations = var_631_dilations_0, groups = var_631_groups_0, pad = var_631_pad_0, pad_type = var_631_pad_type_0, strides = var_631_strides_0, weight = blocks_2_attn_value_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_631_cast_fp16")];
+            tensor<int32, [16]> tile_6 = const()[name = tensor<string, []>("tile_6"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_634_axis_0 = const()[name = tensor<string, []>("op_634_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_15 = split(axis = var_634_axis_0, split_sizes = tile_6, x = var_633_cast_fp16)[name = tensor<string, []>("op_634_cast_fp16")];
+            tensor<int32, [4]> var_651_perm_0 = const()[name = tensor<string, []>("op_651_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_7 = const()[name = tensor<string, []>("tile_7"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_652_axis_0 = const()[name = tensor<string, []>("op_652_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_651_cast_fp16 = transpose(perm = var_651_perm_0, x = k_5_cast_fp16)[name = tensor<string, []>("transpose_22")];
+            tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_15 = split(axis = var_652_axis_0, split_sizes = tile_7, x = var_651_cast_fp16)[name = tensor<string, []>("op_652_cast_fp16")];
+            tensor<int32, [16]> tile_8 = const()[name = tensor<string, []>("tile_8"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_669_axis_0 = const()[name = tensor<string, []>("op_669_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_15 = split(axis = var_669_axis_0, split_sizes = tile_8, x = var_631_cast_fp16)[name = tensor<string, []>("op_669_cast_fp16")];
+            tensor<string, []> aw_65_equation_0 = const()[name = tensor<string, []>("aw_65_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_65_cast_fp16 = einsum(equation = aw_65_equation_0, values = (var_652_cast_fp16_0, var_634_cast_fp16_0))[name = tensor<string, []>("aw_65_cast_fp16")];
+            tensor<string, []> aw_67_equation_0 = const()[name = tensor<string, []>("aw_67_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_67_cast_fp16 = einsum(equation = aw_67_equation_0, values = (var_652_cast_fp16_1, var_634_cast_fp16_1))[name = tensor<string, []>("aw_67_cast_fp16")];
+            tensor<string, []> aw_69_equation_0 = const()[name = tensor<string, []>("aw_69_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_69_cast_fp16 = einsum(equation = aw_69_equation_0, values = (var_652_cast_fp16_2, var_634_cast_fp16_2))[name = tensor<string, []>("aw_69_cast_fp16")];
+            tensor<string, []> aw_71_equation_0 = const()[name = tensor<string, []>("aw_71_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_71_cast_fp16 = einsum(equation = aw_71_equation_0, values = (var_652_cast_fp16_3, var_634_cast_fp16_3))[name = tensor<string, []>("aw_71_cast_fp16")];
+            tensor<string, []> aw_73_equation_0 = const()[name = tensor<string, []>("aw_73_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_73_cast_fp16 = einsum(equation = aw_73_equation_0, values = (var_652_cast_fp16_4, var_634_cast_fp16_4))[name = tensor<string, []>("aw_73_cast_fp16")];
+            tensor<string, []> aw_75_equation_0 = const()[name = tensor<string, []>("aw_75_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_75_cast_fp16 = einsum(equation = aw_75_equation_0, values = (var_652_cast_fp16_5, var_634_cast_fp16_5))[name = tensor<string, []>("aw_75_cast_fp16")];
+            tensor<string, []> aw_77_equation_0 = const()[name = tensor<string, []>("aw_77_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_77_cast_fp16 = einsum(equation = aw_77_equation_0, values = (var_652_cast_fp16_6, var_634_cast_fp16_6))[name = tensor<string, []>("aw_77_cast_fp16")];
+            tensor<string, []> aw_79_equation_0 = const()[name = tensor<string, []>("aw_79_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_79_cast_fp16 = einsum(equation = aw_79_equation_0, values = (var_652_cast_fp16_7, var_634_cast_fp16_7))[name = tensor<string, []>("aw_79_cast_fp16")];
+            tensor<string, []> aw_81_equation_0 = const()[name = tensor<string, []>("aw_81_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_81_cast_fp16 = einsum(equation = aw_81_equation_0, values = (var_652_cast_fp16_8, var_634_cast_fp16_8))[name = tensor<string, []>("aw_81_cast_fp16")];
+            tensor<string, []> aw_83_equation_0 = const()[name = tensor<string, []>("aw_83_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_83_cast_fp16 = einsum(equation = aw_83_equation_0, values = (var_652_cast_fp16_9, var_634_cast_fp16_9))[name = tensor<string, []>("aw_83_cast_fp16")];
+            tensor<string, []> aw_85_equation_0 = const()[name = tensor<string, []>("aw_85_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_85_cast_fp16 = einsum(equation = aw_85_equation_0, values = (var_652_cast_fp16_10, var_634_cast_fp16_10))[name = tensor<string, []>("aw_85_cast_fp16")];
+            tensor<string, []> aw_87_equation_0 = const()[name = tensor<string, []>("aw_87_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_87_cast_fp16 = einsum(equation = aw_87_equation_0, values = (var_652_cast_fp16_11, var_634_cast_fp16_11))[name = tensor<string, []>("aw_87_cast_fp16")];
+            tensor<string, []> aw_89_equation_0 = const()[name = tensor<string, []>("aw_89_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_89_cast_fp16 = einsum(equation = aw_89_equation_0, values = (var_652_cast_fp16_12, var_634_cast_fp16_12))[name = tensor<string, []>("aw_89_cast_fp16")];
+            tensor<string, []> aw_91_equation_0 = const()[name = tensor<string, []>("aw_91_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_91_cast_fp16 = einsum(equation = aw_91_equation_0, values = (var_652_cast_fp16_13, var_634_cast_fp16_13))[name = tensor<string, []>("aw_91_cast_fp16")];
+            tensor<string, []> aw_93_equation_0 = const()[name = tensor<string, []>("aw_93_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_93_cast_fp16 = einsum(equation = aw_93_equation_0, values = (var_652_cast_fp16_14, var_634_cast_fp16_14))[name = tensor<string, []>("aw_93_cast_fp16")];
+            tensor<string, []> aw_95_equation_0 = const()[name = tensor<string, []>("aw_95_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_95_cast_fp16 = einsum(equation = aw_95_equation_0, values = (var_652_cast_fp16_15, var_634_cast_fp16_15))[name = tensor<string, []>("aw_95_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_718_cast_fp16 = softmax(axis = var_582, x = aw_65_cast_fp16)[name = tensor<string, []>("op_718_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_719_cast_fp16 = softmax(axis = var_582, x = aw_67_cast_fp16)[name = tensor<string, []>("op_719_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_720_cast_fp16 = softmax(axis = var_582, x = aw_69_cast_fp16)[name = tensor<string, []>("op_720_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_721_cast_fp16 = softmax(axis = var_582, x = aw_71_cast_fp16)[name = tensor<string, []>("op_721_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_722_cast_fp16 = softmax(axis = var_582, x = aw_73_cast_fp16)[name = tensor<string, []>("op_722_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_723_cast_fp16 = softmax(axis = var_582, x = aw_75_cast_fp16)[name = tensor<string, []>("op_723_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_724_cast_fp16 = softmax(axis = var_582, x = aw_77_cast_fp16)[name = tensor<string, []>("op_724_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_725_cast_fp16 = softmax(axis = var_582, x = aw_79_cast_fp16)[name = tensor<string, []>("op_725_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_726_cast_fp16 = softmax(axis = var_582, x = aw_81_cast_fp16)[name = tensor<string, []>("op_726_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_727_cast_fp16 = softmax(axis = var_582, x = aw_83_cast_fp16)[name = tensor<string, []>("op_727_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_728_cast_fp16 = softmax(axis = var_582, x = aw_85_cast_fp16)[name = tensor<string, []>("op_728_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_729_cast_fp16 = softmax(axis = var_582, x = aw_87_cast_fp16)[name = tensor<string, []>("op_729_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_730_cast_fp16 = softmax(axis = var_582, x = aw_89_cast_fp16)[name = tensor<string, []>("op_730_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_731_cast_fp16 = softmax(axis = var_582, x = aw_91_cast_fp16)[name = tensor<string, []>("op_731_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_732_cast_fp16 = softmax(axis = var_582, x = aw_93_cast_fp16)[name = tensor<string, []>("op_732_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_733_cast_fp16 = softmax(axis = var_582, x = aw_95_cast_fp16)[name = tensor<string, []>("op_733_cast_fp16")];
+            tensor<string, []> var_735_equation_0 = const()[name = tensor<string, []>("op_735_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_735_cast_fp16 = einsum(equation = var_735_equation_0, values = (var_669_cast_fp16_0, var_718_cast_fp16))[name = tensor<string, []>("op_735_cast_fp16")];
+            tensor<string, []> var_737_equation_0 = const()[name = tensor<string, []>("op_737_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_737_cast_fp16 = einsum(equation = var_737_equation_0, values = (var_669_cast_fp16_1, var_719_cast_fp16))[name = tensor<string, []>("op_737_cast_fp16")];
+            tensor<string, []> var_739_equation_0 = const()[name = tensor<string, []>("op_739_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_739_cast_fp16 = einsum(equation = var_739_equation_0, values = (var_669_cast_fp16_2, var_720_cast_fp16))[name = tensor<string, []>("op_739_cast_fp16")];
+            tensor<string, []> var_741_equation_0 = const()[name = tensor<string, []>("op_741_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_741_cast_fp16 = einsum(equation = var_741_equation_0, values = (var_669_cast_fp16_3, var_721_cast_fp16))[name = tensor<string, []>("op_741_cast_fp16")];
+            tensor<string, []> var_743_equation_0 = const()[name = tensor<string, []>("op_743_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_743_cast_fp16 = einsum(equation = var_743_equation_0, values = (var_669_cast_fp16_4, var_722_cast_fp16))[name = tensor<string, []>("op_743_cast_fp16")];
+            tensor<string, []> var_745_equation_0 = const()[name = tensor<string, []>("op_745_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_745_cast_fp16 = einsum(equation = var_745_equation_0, values = (var_669_cast_fp16_5, var_723_cast_fp16))[name = tensor<string, []>("op_745_cast_fp16")];
+            tensor<string, []> var_747_equation_0 = const()[name = tensor<string, []>("op_747_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_747_cast_fp16 = einsum(equation = var_747_equation_0, values = (var_669_cast_fp16_6, var_724_cast_fp16))[name = tensor<string, []>("op_747_cast_fp16")];
+            tensor<string, []> var_749_equation_0 = const()[name = tensor<string, []>("op_749_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_749_cast_fp16 = einsum(equation = var_749_equation_0, values = (var_669_cast_fp16_7, var_725_cast_fp16))[name = tensor<string, []>("op_749_cast_fp16")];
+            tensor<string, []> var_751_equation_0 = const()[name = tensor<string, []>("op_751_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16 = einsum(equation = var_751_equation_0, values = (var_669_cast_fp16_8, var_726_cast_fp16))[name = tensor<string, []>("op_751_cast_fp16")];
+            tensor<string, []> var_753_equation_0 = const()[name = tensor<string, []>("op_753_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_753_cast_fp16 = einsum(equation = var_753_equation_0, values = (var_669_cast_fp16_9, var_727_cast_fp16))[name = tensor<string, []>("op_753_cast_fp16")];
+            tensor<string, []> var_755_equation_0 = const()[name = tensor<string, []>("op_755_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_755_cast_fp16 = einsum(equation = var_755_equation_0, values = (var_669_cast_fp16_10, var_728_cast_fp16))[name = tensor<string, []>("op_755_cast_fp16")];
+            tensor<string, []> var_757_equation_0 = const()[name = tensor<string, []>("op_757_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16 = einsum(equation = var_757_equation_0, values = (var_669_cast_fp16_11, var_729_cast_fp16))[name = tensor<string, []>("op_757_cast_fp16")];
+            tensor<string, []> var_759_equation_0 = const()[name = tensor<string, []>("op_759_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_759_cast_fp16 = einsum(equation = var_759_equation_0, values = (var_669_cast_fp16_12, var_730_cast_fp16))[name = tensor<string, []>("op_759_cast_fp16")];
+            tensor<string, []> var_761_equation_0 = const()[name = tensor<string, []>("op_761_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_761_cast_fp16 = einsum(equation = var_761_equation_0, values = (var_669_cast_fp16_13, var_731_cast_fp16))[name = tensor<string, []>("op_761_cast_fp16")];
+            tensor<string, []> var_763_equation_0 = const()[name = tensor<string, []>("op_763_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_763_cast_fp16 = einsum(equation = var_763_equation_0, values = (var_669_cast_fp16_14, var_732_cast_fp16))[name = tensor<string, []>("op_763_cast_fp16")];
+            tensor<string, []> var_765_equation_0 = const()[name = tensor<string, []>("op_765_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_765_cast_fp16 = einsum(equation = var_765_equation_0, values = (var_669_cast_fp16_15, var_733_cast_fp16))[name = tensor<string, []>("op_765_cast_fp16")];
+            tensor<bool, []> input_25_interleave_0 = const()[name = tensor<string, []>("input_25_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_25_cast_fp16 = concat(axis = var_582, interleave = input_25_interleave_0, values = (var_735_cast_fp16, var_737_cast_fp16, var_739_cast_fp16, var_741_cast_fp16, var_743_cast_fp16, var_745_cast_fp16, var_747_cast_fp16, var_749_cast_fp16, var_751_cast_fp16, var_753_cast_fp16, var_755_cast_fp16, var_757_cast_fp16, var_759_cast_fp16, var_761_cast_fp16, var_763_cast_fp16, var_765_cast_fp16))[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<string, []> var_774_pad_type_0 = const()[name = tensor<string, []>("op_774_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_774_strides_0 = const()[name = tensor<string, []>("op_774_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_774_pad_0 = const()[name = tensor<string, []>("op_774_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_774_dilations_0 = const()[name = tensor<string, []>("op_774_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_774_groups_0 = const()[name = tensor<string, []>("op_774_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_2_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66542272)))];
+            tensor<fp16, [1024]> blocks_2_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68639488)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_774_cast_fp16 = conv(bias = blocks_2_attn_out_bias_to_fp16, dilations = var_774_dilations_0, groups = var_774_groups_0, pad = var_774_pad_0, pad_type = var_774_pad_type_0, strides = var_774_strides_0, weight = blocks_2_attn_out_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("op_774_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = var_774_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> input_27_axes_0 = const()[name = tensor<string, []>("input_27_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68641600)))];
+            tensor<fp16, [1024]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68643712)))];
+            tensor<fp16, []> var_784_to_fp16 = const()[name = tensor<string, []>("op_784_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_27_cast_fp16 = layer_norm(axes = input_27_axes_0, beta = input_27_beta_0_to_fp16, epsilon = var_784_to_fp16, gamma = input_27_gamma_0_to_fp16, x = inputs_11_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_29_strides_0 = const()[name = tensor<string, []>("input_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_29_dilations_0 = const()[name = tensor<string, []>("input_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_29_groups_0 = const()[name = tensor<string, []>("input_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_2_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68645824)))];
+            tensor<fp16, [4096]> blocks_2_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77034496)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_29_cast_fp16 = conv(bias = blocks_2_mlp_0_bias_to_fp16, dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = blocks_2_mlp_0_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<string, []> var_810_pad_type_0 = const()[name = tensor<string, []>("op_810_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_810_strides_0 = const()[name = tensor<string, []>("op_810_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_810_pad_0 = const()[name = tensor<string, []>("op_810_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_810_dilations_0 = const()[name = tensor<string, []>("op_810_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_810_groups_0 = const()[name = tensor<string, []>("op_810_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_2_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77042752)))];
+            tensor<fp16, [1024]> blocks_2_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85431424)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_810_cast_fp16 = conv(bias = blocks_2_mlp_2_bias_to_fp16, dilations = var_810_dilations_0, groups = var_810_groups_0, pad = var_810_pad_0, pad_type = var_810_pad_type_0, strides = var_810_strides_0, weight = blocks_2_mlp_2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("op_810_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = var_810_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_819 = const()[name = tensor<string, []>("op_819"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_33_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_33_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85433536)))];
+            tensor<fp16, [1024]> input_33_beta_0_to_fp16 = const()[name = tensor<string, []>("input_33_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85435648)))];
+            tensor<fp16, []> var_835_to_fp16 = const()[name = tensor<string, []>("op_835_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, beta = input_33_beta_0_to_fp16, epsilon = var_835_to_fp16, gamma = input_33_gamma_0_to_fp16, x = inputs_13_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<string, []> q_7_pad_type_0 = const()[name = tensor<string, []>("q_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_7_strides_0 = const()[name = tensor<string, []>("q_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_7_pad_0 = const()[name = tensor<string, []>("q_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_7_dilations_0 = const()[name = tensor<string, []>("q_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_7_groups_0 = const()[name = tensor<string, []>("q_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_870_weight_0_to_fp16 = const()[name = tensor<string, []>("op_870_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85437760)))];
+            tensor<fp16, [1024]> var_870_bias_0_to_fp16 = const()[name = tensor<string, []>("op_870_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87534976)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_870_cast_fp16 = conv(bias = var_870_bias_0_to_fp16, dilations = q_7_dilations_0, groups = q_7_groups_0, pad = q_7_pad_0, pad_type = q_7_pad_type_0, strides = q_7_strides_0, weight = var_870_weight_0_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_870_cast_fp16")];
+            tensor<string, []> k_7_pad_type_0 = const()[name = tensor<string, []>("k_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_7_strides_0 = const()[name = tensor<string, []>("k_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_7_pad_0 = const()[name = tensor<string, []>("k_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_7_dilations_0 = const()[name = tensor<string, []>("k_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_7_groups_0 = const()[name = tensor<string, []>("k_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_3_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87537088)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_7_cast_fp16 = conv(dilations = k_7_dilations_0, groups = k_7_groups_0, pad = k_7_pad_0, pad_type = k_7_pad_type_0, strides = k_7_strides_0, weight = blocks_3_attn_key_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("k_7_cast_fp16")];
+            tensor<string, []> var_868_pad_type_0 = const()[name = tensor<string, []>("op_868_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_868_strides_0 = const()[name = tensor<string, []>("op_868_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_868_pad_0 = const()[name = tensor<string, []>("op_868_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_868_dilations_0 = const()[name = tensor<string, []>("op_868_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_868_groups_0 = const()[name = tensor<string, []>("op_868_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_3_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89634304)))];
+            tensor<fp16, [1024]> blocks_3_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91731520)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_868_cast_fp16 = conv(bias = blocks_3_attn_value_bias_to_fp16, dilations = var_868_dilations_0, groups = var_868_groups_0, pad = var_868_pad_0, pad_type = var_868_pad_type_0, strides = var_868_strides_0, weight = blocks_3_attn_value_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_868_cast_fp16")];
+            tensor<int32, [16]> tile_9 = const()[name = tensor<string, []>("tile_9"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_871_axis_0 = const()[name = tensor<string, []>("op_871_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_15 = split(axis = var_871_axis_0, split_sizes = tile_9, x = var_870_cast_fp16)[name = tensor<string, []>("op_871_cast_fp16")];
+            tensor<int32, [4]> var_888_perm_0 = const()[name = tensor<string, []>("op_888_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_10 = const()[name = tensor<string, []>("tile_10"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_889_axis_0 = const()[name = tensor<string, []>("op_889_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_888_cast_fp16 = transpose(perm = var_888_perm_0, x = k_7_cast_fp16)[name = tensor<string, []>("transpose_21")];
+            tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_15 = split(axis = var_889_axis_0, split_sizes = tile_10, x = var_888_cast_fp16)[name = tensor<string, []>("op_889_cast_fp16")];
+            tensor<int32, [16]> tile_11 = const()[name = tensor<string, []>("tile_11"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_906_axis_0 = const()[name = tensor<string, []>("op_906_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_15 = split(axis = var_906_axis_0, split_sizes = tile_11, x = var_868_cast_fp16)[name = tensor<string, []>("op_906_cast_fp16")];
+            tensor<string, []> aw_97_equation_0 = const()[name = tensor<string, []>("aw_97_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_97_cast_fp16 = einsum(equation = aw_97_equation_0, values = (var_889_cast_fp16_0, var_871_cast_fp16_0))[name = tensor<string, []>("aw_97_cast_fp16")];
+            tensor<string, []> aw_99_equation_0 = const()[name = tensor<string, []>("aw_99_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_99_cast_fp16 = einsum(equation = aw_99_equation_0, values = (var_889_cast_fp16_1, var_871_cast_fp16_1))[name = tensor<string, []>("aw_99_cast_fp16")];
+            tensor<string, []> aw_101_equation_0 = const()[name = tensor<string, []>("aw_101_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_101_cast_fp16 = einsum(equation = aw_101_equation_0, values = (var_889_cast_fp16_2, var_871_cast_fp16_2))[name = tensor<string, []>("aw_101_cast_fp16")];
+            tensor<string, []> aw_103_equation_0 = const()[name = tensor<string, []>("aw_103_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_103_cast_fp16 = einsum(equation = aw_103_equation_0, values = (var_889_cast_fp16_3, var_871_cast_fp16_3))[name = tensor<string, []>("aw_103_cast_fp16")];
+            tensor<string, []> aw_105_equation_0 = const()[name = tensor<string, []>("aw_105_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_105_cast_fp16 = einsum(equation = aw_105_equation_0, values = (var_889_cast_fp16_4, var_871_cast_fp16_4))[name = tensor<string, []>("aw_105_cast_fp16")];
+            tensor<string, []> aw_107_equation_0 = const()[name = tensor<string, []>("aw_107_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_107_cast_fp16 = einsum(equation = aw_107_equation_0, values = (var_889_cast_fp16_5, var_871_cast_fp16_5))[name = tensor<string, []>("aw_107_cast_fp16")];
+            tensor<string, []> aw_109_equation_0 = const()[name = tensor<string, []>("aw_109_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_109_cast_fp16 = einsum(equation = aw_109_equation_0, values = (var_889_cast_fp16_6, var_871_cast_fp16_6))[name = tensor<string, []>("aw_109_cast_fp16")];
+            tensor<string, []> aw_111_equation_0 = const()[name = tensor<string, []>("aw_111_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_111_cast_fp16 = einsum(equation = aw_111_equation_0, values = (var_889_cast_fp16_7, var_871_cast_fp16_7))[name = tensor<string, []>("aw_111_cast_fp16")];
+            tensor<string, []> aw_113_equation_0 = const()[name = tensor<string, []>("aw_113_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_113_cast_fp16 = einsum(equation = aw_113_equation_0, values = (var_889_cast_fp16_8, var_871_cast_fp16_8))[name = tensor<string, []>("aw_113_cast_fp16")];
+            tensor<string, []> aw_115_equation_0 = const()[name = tensor<string, []>("aw_115_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_115_cast_fp16 = einsum(equation = aw_115_equation_0, values = (var_889_cast_fp16_9, var_871_cast_fp16_9))[name = tensor<string, []>("aw_115_cast_fp16")];
+            tensor<string, []> aw_117_equation_0 = const()[name = tensor<string, []>("aw_117_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_117_cast_fp16 = einsum(equation = aw_117_equation_0, values = (var_889_cast_fp16_10, var_871_cast_fp16_10))[name = tensor<string, []>("aw_117_cast_fp16")];
+            tensor<string, []> aw_119_equation_0 = const()[name = tensor<string, []>("aw_119_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_119_cast_fp16 = einsum(equation = aw_119_equation_0, values = (var_889_cast_fp16_11, var_871_cast_fp16_11))[name = tensor<string, []>("aw_119_cast_fp16")];
+            tensor<string, []> aw_121_equation_0 = const()[name = tensor<string, []>("aw_121_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_121_cast_fp16 = einsum(equation = aw_121_equation_0, values = (var_889_cast_fp16_12, var_871_cast_fp16_12))[name = tensor<string, []>("aw_121_cast_fp16")];
+            tensor<string, []> aw_123_equation_0 = const()[name = tensor<string, []>("aw_123_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_123_cast_fp16 = einsum(equation = aw_123_equation_0, values = (var_889_cast_fp16_13, var_871_cast_fp16_13))[name = tensor<string, []>("aw_123_cast_fp16")];
+            tensor<string, []> aw_125_equation_0 = const()[name = tensor<string, []>("aw_125_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_125_cast_fp16 = einsum(equation = aw_125_equation_0, values = (var_889_cast_fp16_14, var_871_cast_fp16_14))[name = tensor<string, []>("aw_125_cast_fp16")];
+            tensor<string, []> aw_127_equation_0 = const()[name = tensor<string, []>("aw_127_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_127_cast_fp16 = einsum(equation = aw_127_equation_0, values = (var_889_cast_fp16_15, var_871_cast_fp16_15))[name = tensor<string, []>("aw_127_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_955_cast_fp16 = softmax(axis = var_819, x = aw_97_cast_fp16)[name = tensor<string, []>("op_955_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_956_cast_fp16 = softmax(axis = var_819, x = aw_99_cast_fp16)[name = tensor<string, []>("op_956_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_957_cast_fp16 = softmax(axis = var_819, x = aw_101_cast_fp16)[name = tensor<string, []>("op_957_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_958_cast_fp16 = softmax(axis = var_819, x = aw_103_cast_fp16)[name = tensor<string, []>("op_958_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_959_cast_fp16 = softmax(axis = var_819, x = aw_105_cast_fp16)[name = tensor<string, []>("op_959_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_960_cast_fp16 = softmax(axis = var_819, x = aw_107_cast_fp16)[name = tensor<string, []>("op_960_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_961_cast_fp16 = softmax(axis = var_819, x = aw_109_cast_fp16)[name = tensor<string, []>("op_961_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_962_cast_fp16 = softmax(axis = var_819, x = aw_111_cast_fp16)[name = tensor<string, []>("op_962_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_963_cast_fp16 = softmax(axis = var_819, x = aw_113_cast_fp16)[name = tensor<string, []>("op_963_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_964_cast_fp16 = softmax(axis = var_819, x = aw_115_cast_fp16)[name = tensor<string, []>("op_964_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_965_cast_fp16 = softmax(axis = var_819, x = aw_117_cast_fp16)[name = tensor<string, []>("op_965_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_966_cast_fp16 = softmax(axis = var_819, x = aw_119_cast_fp16)[name = tensor<string, []>("op_966_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_967_cast_fp16 = softmax(axis = var_819, x = aw_121_cast_fp16)[name = tensor<string, []>("op_967_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_968_cast_fp16 = softmax(axis = var_819, x = aw_123_cast_fp16)[name = tensor<string, []>("op_968_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_969_cast_fp16 = softmax(axis = var_819, x = aw_125_cast_fp16)[name = tensor<string, []>("op_969_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_970_cast_fp16 = softmax(axis = var_819, x = aw_127_cast_fp16)[name = tensor<string, []>("op_970_cast_fp16")];
+            tensor<string, []> var_972_equation_0 = const()[name = tensor<string, []>("op_972_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_972_cast_fp16 = einsum(equation = var_972_equation_0, values = (var_906_cast_fp16_0, var_955_cast_fp16))[name = tensor<string, []>("op_972_cast_fp16")];
+            tensor<string, []> var_974_equation_0 = const()[name = tensor<string, []>("op_974_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_974_cast_fp16 = einsum(equation = var_974_equation_0, values = (var_906_cast_fp16_1, var_956_cast_fp16))[name = tensor<string, []>("op_974_cast_fp16")];
+            tensor<string, []> var_976_equation_0 = const()[name = tensor<string, []>("op_976_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_976_cast_fp16 = einsum(equation = var_976_equation_0, values = (var_906_cast_fp16_2, var_957_cast_fp16))[name = tensor<string, []>("op_976_cast_fp16")];
+            tensor<string, []> var_978_equation_0 = const()[name = tensor<string, []>("op_978_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_978_cast_fp16 = einsum(equation = var_978_equation_0, values = (var_906_cast_fp16_3, var_958_cast_fp16))[name = tensor<string, []>("op_978_cast_fp16")];
+            tensor<string, []> var_980_equation_0 = const()[name = tensor<string, []>("op_980_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_980_cast_fp16 = einsum(equation = var_980_equation_0, values = (var_906_cast_fp16_4, var_959_cast_fp16))[name = tensor<string, []>("op_980_cast_fp16")];
+            tensor<string, []> var_982_equation_0 = const()[name = tensor<string, []>("op_982_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_982_cast_fp16 = einsum(equation = var_982_equation_0, values = (var_906_cast_fp16_5, var_960_cast_fp16))[name = tensor<string, []>("op_982_cast_fp16")];
+            tensor<string, []> var_984_equation_0 = const()[name = tensor<string, []>("op_984_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_984_cast_fp16 = einsum(equation = var_984_equation_0, values = (var_906_cast_fp16_6, var_961_cast_fp16))[name = tensor<string, []>("op_984_cast_fp16")];
+            tensor<string, []> var_986_equation_0 = const()[name = tensor<string, []>("op_986_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_986_cast_fp16 = einsum(equation = var_986_equation_0, values = (var_906_cast_fp16_7, var_962_cast_fp16))[name = tensor<string, []>("op_986_cast_fp16")];
+            tensor<string, []> var_988_equation_0 = const()[name = tensor<string, []>("op_988_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_988_cast_fp16 = einsum(equation = var_988_equation_0, values = (var_906_cast_fp16_8, var_963_cast_fp16))[name = tensor<string, []>("op_988_cast_fp16")];
+            tensor<string, []> var_990_equation_0 = const()[name = tensor<string, []>("op_990_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_990_cast_fp16 = einsum(equation = var_990_equation_0, values = (var_906_cast_fp16_9, var_964_cast_fp16))[name = tensor<string, []>("op_990_cast_fp16")];
+            tensor<string, []> var_992_equation_0 = const()[name = tensor<string, []>("op_992_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_992_cast_fp16 = einsum(equation = var_992_equation_0, values = (var_906_cast_fp16_10, var_965_cast_fp16))[name = tensor<string, []>("op_992_cast_fp16")];
+            tensor<string, []> var_994_equation_0 = const()[name = tensor<string, []>("op_994_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_994_cast_fp16 = einsum(equation = var_994_equation_0, values = (var_906_cast_fp16_11, var_966_cast_fp16))[name = tensor<string, []>("op_994_cast_fp16")];
+            tensor<string, []> var_996_equation_0 = const()[name = tensor<string, []>("op_996_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_996_cast_fp16 = einsum(equation = var_996_equation_0, values = (var_906_cast_fp16_12, var_967_cast_fp16))[name = tensor<string, []>("op_996_cast_fp16")];
+            tensor<string, []> var_998_equation_0 = const()[name = tensor<string, []>("op_998_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_998_cast_fp16 = einsum(equation = var_998_equation_0, values = (var_906_cast_fp16_13, var_968_cast_fp16))[name = tensor<string, []>("op_998_cast_fp16")];
+            tensor<string, []> var_1000_equation_0 = const()[name = tensor<string, []>("op_1000_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1000_cast_fp16 = einsum(equation = var_1000_equation_0, values = (var_906_cast_fp16_14, var_969_cast_fp16))[name = tensor<string, []>("op_1000_cast_fp16")];
+            tensor<string, []> var_1002_equation_0 = const()[name = tensor<string, []>("op_1002_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1002_cast_fp16 = einsum(equation = var_1002_equation_0, values = (var_906_cast_fp16_15, var_970_cast_fp16))[name = tensor<string, []>("op_1002_cast_fp16")];
+            tensor<bool, []> input_35_interleave_0 = const()[name = tensor<string, []>("input_35_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_35_cast_fp16 = concat(axis = var_819, interleave = input_35_interleave_0, values = (var_972_cast_fp16, var_974_cast_fp16, var_976_cast_fp16, var_978_cast_fp16, var_980_cast_fp16, var_982_cast_fp16, var_984_cast_fp16, var_986_cast_fp16, var_988_cast_fp16, var_990_cast_fp16, var_992_cast_fp16, var_994_cast_fp16, var_996_cast_fp16, var_998_cast_fp16, var_1000_cast_fp16, var_1002_cast_fp16))[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<string, []> var_1011_pad_type_0 = const()[name = tensor<string, []>("op_1011_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1011_strides_0 = const()[name = tensor<string, []>("op_1011_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1011_pad_0 = const()[name = tensor<string, []>("op_1011_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1011_dilations_0 = const()[name = tensor<string, []>("op_1011_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1011_groups_0 = const()[name = tensor<string, []>("op_1011_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_3_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91733632)))];
+            tensor<fp16, [1024]> blocks_3_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93830848)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1011_cast_fp16 = conv(bias = blocks_3_attn_out_bias_to_fp16, dilations = var_1011_dilations_0, groups = var_1011_groups_0, pad = var_1011_pad_0, pad_type = var_1011_pad_type_0, strides = var_1011_strides_0, weight = blocks_3_attn_out_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("op_1011_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = var_1011_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> input_37_axes_0 = const()[name = tensor<string, []>("input_37_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_37_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93832960)))];
+            tensor<fp16, [1024]> input_37_beta_0_to_fp16 = const()[name = tensor<string, []>("input_37_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93835072)))];
+            tensor<fp16, []> var_1021_to_fp16 = const()[name = tensor<string, []>("op_1021_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = input_37_beta_0_to_fp16, epsilon = var_1021_to_fp16, gamma = input_37_gamma_0_to_fp16, x = inputs_15_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_pad_type_0 = const()[name = tensor<string, []>("input_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_39_strides_0 = const()[name = tensor<string, []>("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_39_pad_0 = const()[name = tensor<string, []>("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_39_dilations_0 = const()[name = tensor<string, []>("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_39_groups_0 = const()[name = tensor<string, []>("input_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_3_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93837184)))];
+            tensor<fp16, [4096]> blocks_3_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102225856)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_39_cast_fp16 = conv(bias = blocks_3_mlp_0_bias_to_fp16, dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = blocks_3_mlp_0_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<string, []> input_41_mode_0 = const()[name = tensor<string, []>("input_41_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = input_39_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<string, []> var_1047_pad_type_0 = const()[name = tensor<string, []>("op_1047_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1047_strides_0 = const()[name = tensor<string, []>("op_1047_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1047_pad_0 = const()[name = tensor<string, []>("op_1047_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1047_dilations_0 = const()[name = tensor<string, []>("op_1047_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1047_groups_0 = const()[name = tensor<string, []>("op_1047_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_3_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102234112)))];
+            tensor<fp16, [1024]> blocks_3_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110622784)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1047_cast_fp16 = conv(bias = blocks_3_mlp_2_bias_to_fp16, dilations = var_1047_dilations_0, groups = var_1047_groups_0, pad = var_1047_pad_0, pad_type = var_1047_pad_type_0, strides = var_1047_strides_0, weight = blocks_3_mlp_2_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("op_1047_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = var_1047_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, []> var_1056 = const()[name = tensor<string, []>("op_1056"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_43_axes_0 = const()[name = tensor<string, []>("input_43_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_43_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110624896)))];
+            tensor<fp16, [1024]> input_43_beta_0_to_fp16 = const()[name = tensor<string, []>("input_43_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110627008)))];
+            tensor<fp16, []> var_1072_to_fp16 = const()[name = tensor<string, []>("op_1072_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_43_cast_fp16 = layer_norm(axes = input_43_axes_0, beta = input_43_beta_0_to_fp16, epsilon = var_1072_to_fp16, gamma = input_43_gamma_0_to_fp16, x = inputs_17_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<string, []> q_9_pad_type_0 = const()[name = tensor<string, []>("q_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_9_strides_0 = const()[name = tensor<string, []>("q_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_9_pad_0 = const()[name = tensor<string, []>("q_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_9_dilations_0 = const()[name = tensor<string, []>("q_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_9_groups_0 = const()[name = tensor<string, []>("q_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_1107_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1107_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110629120)))];
+            tensor<fp16, [1024]> var_1107_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1107_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112726336)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1107_cast_fp16 = conv(bias = var_1107_bias_0_to_fp16, dilations = q_9_dilations_0, groups = q_9_groups_0, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = q_9_strides_0, weight = var_1107_weight_0_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_1107_cast_fp16")];
+            tensor<string, []> k_9_pad_type_0 = const()[name = tensor<string, []>("k_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_9_strides_0 = const()[name = tensor<string, []>("k_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_9_pad_0 = const()[name = tensor<string, []>("k_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_9_dilations_0 = const()[name = tensor<string, []>("k_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_9_groups_0 = const()[name = tensor<string, []>("k_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_4_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112728448)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_9_cast_fp16 = conv(dilations = k_9_dilations_0, groups = k_9_groups_0, pad = k_9_pad_0, pad_type = k_9_pad_type_0, strides = k_9_strides_0, weight = blocks_4_attn_key_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("k_9_cast_fp16")];
+            tensor<string, []> var_1105_pad_type_0 = const()[name = tensor<string, []>("op_1105_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1105_strides_0 = const()[name = tensor<string, []>("op_1105_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1105_pad_0 = const()[name = tensor<string, []>("op_1105_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1105_dilations_0 = const()[name = tensor<string, []>("op_1105_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1105_groups_0 = const()[name = tensor<string, []>("op_1105_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_4_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114825664)))];
+            tensor<fp16, [1024]> blocks_4_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116922880)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1105_cast_fp16 = conv(bias = blocks_4_attn_value_bias_to_fp16, dilations = var_1105_dilations_0, groups = var_1105_groups_0, pad = var_1105_pad_0, pad_type = var_1105_pad_type_0, strides = var_1105_strides_0, weight = blocks_4_attn_value_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_1105_cast_fp16")];
+            tensor<int32, [16]> tile_12 = const()[name = tensor<string, []>("tile_12"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1108_axis_0 = const()[name = tensor<string, []>("op_1108_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_15 = split(axis = var_1108_axis_0, split_sizes = tile_12, x = var_1107_cast_fp16)[name = tensor<string, []>("op_1108_cast_fp16")];
+            tensor<int32, [4]> var_1125_perm_0 = const()[name = tensor<string, []>("op_1125_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_13 = const()[name = tensor<string, []>("tile_13"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1126_axis_0 = const()[name = tensor<string, []>("op_1126_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_1125_cast_fp16 = transpose(perm = var_1125_perm_0, x = k_9_cast_fp16)[name = tensor<string, []>("transpose_20")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_15 = split(axis = var_1126_axis_0, split_sizes = tile_13, x = var_1125_cast_fp16)[name = tensor<string, []>("op_1126_cast_fp16")];
+            tensor<int32, [16]> tile_14 = const()[name = tensor<string, []>("tile_14"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1143_axis_0 = const()[name = tensor<string, []>("op_1143_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_15 = split(axis = var_1143_axis_0, split_sizes = tile_14, x = var_1105_cast_fp16)[name = tensor<string, []>("op_1143_cast_fp16")];
+            tensor<string, []> aw_129_equation_0 = const()[name = tensor<string, []>("aw_129_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_129_cast_fp16 = einsum(equation = aw_129_equation_0, values = (var_1126_cast_fp16_0, var_1108_cast_fp16_0))[name = tensor<string, []>("aw_129_cast_fp16")];
+            tensor<string, []> aw_131_equation_0 = const()[name = tensor<string, []>("aw_131_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_131_cast_fp16 = einsum(equation = aw_131_equation_0, values = (var_1126_cast_fp16_1, var_1108_cast_fp16_1))[name = tensor<string, []>("aw_131_cast_fp16")];
+            tensor<string, []> aw_133_equation_0 = const()[name = tensor<string, []>("aw_133_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_133_cast_fp16 = einsum(equation = aw_133_equation_0, values = (var_1126_cast_fp16_2, var_1108_cast_fp16_2))[name = tensor<string, []>("aw_133_cast_fp16")];
+            tensor<string, []> aw_135_equation_0 = const()[name = tensor<string, []>("aw_135_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_135_cast_fp16 = einsum(equation = aw_135_equation_0, values = (var_1126_cast_fp16_3, var_1108_cast_fp16_3))[name = tensor<string, []>("aw_135_cast_fp16")];
+            tensor<string, []> aw_137_equation_0 = const()[name = tensor<string, []>("aw_137_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_137_cast_fp16 = einsum(equation = aw_137_equation_0, values = (var_1126_cast_fp16_4, var_1108_cast_fp16_4))[name = tensor<string, []>("aw_137_cast_fp16")];
+            tensor<string, []> aw_139_equation_0 = const()[name = tensor<string, []>("aw_139_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_139_cast_fp16 = einsum(equation = aw_139_equation_0, values = (var_1126_cast_fp16_5, var_1108_cast_fp16_5))[name = tensor<string, []>("aw_139_cast_fp16")];
+            tensor<string, []> aw_141_equation_0 = const()[name = tensor<string, []>("aw_141_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_141_cast_fp16 = einsum(equation = aw_141_equation_0, values = (var_1126_cast_fp16_6, var_1108_cast_fp16_6))[name = tensor<string, []>("aw_141_cast_fp16")];
+            tensor<string, []> aw_143_equation_0 = const()[name = tensor<string, []>("aw_143_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_143_cast_fp16 = einsum(equation = aw_143_equation_0, values = (var_1126_cast_fp16_7, var_1108_cast_fp16_7))[name = tensor<string, []>("aw_143_cast_fp16")];
+            tensor<string, []> aw_145_equation_0 = const()[name = tensor<string, []>("aw_145_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_145_cast_fp16 = einsum(equation = aw_145_equation_0, values = (var_1126_cast_fp16_8, var_1108_cast_fp16_8))[name = tensor<string, []>("aw_145_cast_fp16")];
+            tensor<string, []> aw_147_equation_0 = const()[name = tensor<string, []>("aw_147_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_147_cast_fp16 = einsum(equation = aw_147_equation_0, values = (var_1126_cast_fp16_9, var_1108_cast_fp16_9))[name = tensor<string, []>("aw_147_cast_fp16")];
+            tensor<string, []> aw_149_equation_0 = const()[name = tensor<string, []>("aw_149_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_149_cast_fp16 = einsum(equation = aw_149_equation_0, values = (var_1126_cast_fp16_10, var_1108_cast_fp16_10))[name = tensor<string, []>("aw_149_cast_fp16")];
+            tensor<string, []> aw_151_equation_0 = const()[name = tensor<string, []>("aw_151_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_151_cast_fp16 = einsum(equation = aw_151_equation_0, values = (var_1126_cast_fp16_11, var_1108_cast_fp16_11))[name = tensor<string, []>("aw_151_cast_fp16")];
+            tensor<string, []> aw_153_equation_0 = const()[name = tensor<string, []>("aw_153_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_153_cast_fp16 = einsum(equation = aw_153_equation_0, values = (var_1126_cast_fp16_12, var_1108_cast_fp16_12))[name = tensor<string, []>("aw_153_cast_fp16")];
+            tensor<string, []> aw_155_equation_0 = const()[name = tensor<string, []>("aw_155_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_155_cast_fp16 = einsum(equation = aw_155_equation_0, values = (var_1126_cast_fp16_13, var_1108_cast_fp16_13))[name = tensor<string, []>("aw_155_cast_fp16")];
+            tensor<string, []> aw_157_equation_0 = const()[name = tensor<string, []>("aw_157_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_157_cast_fp16 = einsum(equation = aw_157_equation_0, values = (var_1126_cast_fp16_14, var_1108_cast_fp16_14))[name = tensor<string, []>("aw_157_cast_fp16")];
+            tensor<string, []> aw_159_equation_0 = const()[name = tensor<string, []>("aw_159_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_159_cast_fp16 = einsum(equation = aw_159_equation_0, values = (var_1126_cast_fp16_15, var_1108_cast_fp16_15))[name = tensor<string, []>("aw_159_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1192_cast_fp16 = softmax(axis = var_1056, x = aw_129_cast_fp16)[name = tensor<string, []>("op_1192_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1193_cast_fp16 = softmax(axis = var_1056, x = aw_131_cast_fp16)[name = tensor<string, []>("op_1193_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1194_cast_fp16 = softmax(axis = var_1056, x = aw_133_cast_fp16)[name = tensor<string, []>("op_1194_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1195_cast_fp16 = softmax(axis = var_1056, x = aw_135_cast_fp16)[name = tensor<string, []>("op_1195_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1196_cast_fp16 = softmax(axis = var_1056, x = aw_137_cast_fp16)[name = tensor<string, []>("op_1196_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1197_cast_fp16 = softmax(axis = var_1056, x = aw_139_cast_fp16)[name = tensor<string, []>("op_1197_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1198_cast_fp16 = softmax(axis = var_1056, x = aw_141_cast_fp16)[name = tensor<string, []>("op_1198_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1199_cast_fp16 = softmax(axis = var_1056, x = aw_143_cast_fp16)[name = tensor<string, []>("op_1199_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1200_cast_fp16 = softmax(axis = var_1056, x = aw_145_cast_fp16)[name = tensor<string, []>("op_1200_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1201_cast_fp16 = softmax(axis = var_1056, x = aw_147_cast_fp16)[name = tensor<string, []>("op_1201_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1202_cast_fp16 = softmax(axis = var_1056, x = aw_149_cast_fp16)[name = tensor<string, []>("op_1202_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1203_cast_fp16 = softmax(axis = var_1056, x = aw_151_cast_fp16)[name = tensor<string, []>("op_1203_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1204_cast_fp16 = softmax(axis = var_1056, x = aw_153_cast_fp16)[name = tensor<string, []>("op_1204_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1205_cast_fp16 = softmax(axis = var_1056, x = aw_155_cast_fp16)[name = tensor<string, []>("op_1205_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1206_cast_fp16 = softmax(axis = var_1056, x = aw_157_cast_fp16)[name = tensor<string, []>("op_1206_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1207_cast_fp16 = softmax(axis = var_1056, x = aw_159_cast_fp16)[name = tensor<string, []>("op_1207_cast_fp16")];
+            tensor<string, []> var_1209_equation_0 = const()[name = tensor<string, []>("op_1209_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1209_cast_fp16 = einsum(equation = var_1209_equation_0, values = (var_1143_cast_fp16_0, var_1192_cast_fp16))[name = tensor<string, []>("op_1209_cast_fp16")];
+            tensor<string, []> var_1211_equation_0 = const()[name = tensor<string, []>("op_1211_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1211_cast_fp16 = einsum(equation = var_1211_equation_0, values = (var_1143_cast_fp16_1, var_1193_cast_fp16))[name = tensor<string, []>("op_1211_cast_fp16")];
+            tensor<string, []> var_1213_equation_0 = const()[name = tensor<string, []>("op_1213_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1213_cast_fp16 = einsum(equation = var_1213_equation_0, values = (var_1143_cast_fp16_2, var_1194_cast_fp16))[name = tensor<string, []>("op_1213_cast_fp16")];
+            tensor<string, []> var_1215_equation_0 = const()[name = tensor<string, []>("op_1215_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1215_cast_fp16 = einsum(equation = var_1215_equation_0, values = (var_1143_cast_fp16_3, var_1195_cast_fp16))[name = tensor<string, []>("op_1215_cast_fp16")];
+            tensor<string, []> var_1217_equation_0 = const()[name = tensor<string, []>("op_1217_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1217_cast_fp16 = einsum(equation = var_1217_equation_0, values = (var_1143_cast_fp16_4, var_1196_cast_fp16))[name = tensor<string, []>("op_1217_cast_fp16")];
+            tensor<string, []> var_1219_equation_0 = const()[name = tensor<string, []>("op_1219_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1219_cast_fp16 = einsum(equation = var_1219_equation_0, values = (var_1143_cast_fp16_5, var_1197_cast_fp16))[name = tensor<string, []>("op_1219_cast_fp16")];
+            tensor<string, []> var_1221_equation_0 = const()[name = tensor<string, []>("op_1221_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1221_cast_fp16 = einsum(equation = var_1221_equation_0, values = (var_1143_cast_fp16_6, var_1198_cast_fp16))[name = tensor<string, []>("op_1221_cast_fp16")];
+            tensor<string, []> var_1223_equation_0 = const()[name = tensor<string, []>("op_1223_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1223_cast_fp16 = einsum(equation = var_1223_equation_0, values = (var_1143_cast_fp16_7, var_1199_cast_fp16))[name = tensor<string, []>("op_1223_cast_fp16")];
+            tensor<string, []> var_1225_equation_0 = const()[name = tensor<string, []>("op_1225_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1225_cast_fp16 = einsum(equation = var_1225_equation_0, values = (var_1143_cast_fp16_8, var_1200_cast_fp16))[name = tensor<string, []>("op_1225_cast_fp16")];
+            tensor<string, []> var_1227_equation_0 = const()[name = tensor<string, []>("op_1227_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1227_cast_fp16 = einsum(equation = var_1227_equation_0, values = (var_1143_cast_fp16_9, var_1201_cast_fp16))[name = tensor<string, []>("op_1227_cast_fp16")];
+            tensor<string, []> var_1229_equation_0 = const()[name = tensor<string, []>("op_1229_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1229_cast_fp16 = einsum(equation = var_1229_equation_0, values = (var_1143_cast_fp16_10, var_1202_cast_fp16))[name = tensor<string, []>("op_1229_cast_fp16")];
+            tensor<string, []> var_1231_equation_0 = const()[name = tensor<string, []>("op_1231_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1231_cast_fp16 = einsum(equation = var_1231_equation_0, values = (var_1143_cast_fp16_11, var_1203_cast_fp16))[name = tensor<string, []>("op_1231_cast_fp16")];
+            tensor<string, []> var_1233_equation_0 = const()[name = tensor<string, []>("op_1233_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1233_cast_fp16 = einsum(equation = var_1233_equation_0, values = (var_1143_cast_fp16_12, var_1204_cast_fp16))[name = tensor<string, []>("op_1233_cast_fp16")];
+            tensor<string, []> var_1235_equation_0 = const()[name = tensor<string, []>("op_1235_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1235_cast_fp16 = einsum(equation = var_1235_equation_0, values = (var_1143_cast_fp16_13, var_1205_cast_fp16))[name = tensor<string, []>("op_1235_cast_fp16")];
+            tensor<string, []> var_1237_equation_0 = const()[name = tensor<string, []>("op_1237_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1237_cast_fp16 = einsum(equation = var_1237_equation_0, values = (var_1143_cast_fp16_14, var_1206_cast_fp16))[name = tensor<string, []>("op_1237_cast_fp16")];
+            tensor<string, []> var_1239_equation_0 = const()[name = tensor<string, []>("op_1239_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1239_cast_fp16 = einsum(equation = var_1239_equation_0, values = (var_1143_cast_fp16_15, var_1207_cast_fp16))[name = tensor<string, []>("op_1239_cast_fp16")];
+            tensor<bool, []> input_45_interleave_0 = const()[name = tensor<string, []>("input_45_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_45_cast_fp16 = concat(axis = var_1056, interleave = input_45_interleave_0, values = (var_1209_cast_fp16, var_1211_cast_fp16, var_1213_cast_fp16, var_1215_cast_fp16, var_1217_cast_fp16, var_1219_cast_fp16, var_1221_cast_fp16, var_1223_cast_fp16, var_1225_cast_fp16, var_1227_cast_fp16, var_1229_cast_fp16, var_1231_cast_fp16, var_1233_cast_fp16, var_1235_cast_fp16, var_1237_cast_fp16, var_1239_cast_fp16))[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<string, []> var_1248_pad_type_0 = const()[name = tensor<string, []>("op_1248_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1248_strides_0 = const()[name = tensor<string, []>("op_1248_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1248_pad_0 = const()[name = tensor<string, []>("op_1248_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1248_dilations_0 = const()[name = tensor<string, []>("op_1248_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1248_groups_0 = const()[name = tensor<string, []>("op_1248_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_4_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116924992)))];
+            tensor<fp16, [1024]> blocks_4_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119022208)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1248_cast_fp16 = conv(bias = blocks_4_attn_out_bias_to_fp16, dilations = var_1248_dilations_0, groups = var_1248_groups_0, pad = var_1248_pad_0, pad_type = var_1248_pad_type_0, strides = var_1248_strides_0, weight = blocks_4_attn_out_weight_to_fp16, x = input_45_cast_fp16)[name = tensor<string, []>("op_1248_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = var_1248_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, [1]> input_47_axes_0 = const()[name = tensor<string, []>("input_47_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_47_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_47_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119024320)))];
+            tensor<fp16, [1024]> input_47_beta_0_to_fp16 = const()[name = tensor<string, []>("input_47_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119026432)))];
+            tensor<fp16, []> var_1258_to_fp16 = const()[name = tensor<string, []>("op_1258_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, beta = input_47_beta_0_to_fp16, epsilon = var_1258_to_fp16, gamma = input_47_gamma_0_to_fp16, x = inputs_19_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<string, []> input_49_pad_type_0 = const()[name = tensor<string, []>("input_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_49_strides_0 = const()[name = tensor<string, []>("input_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_49_pad_0 = const()[name = tensor<string, []>("input_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_49_dilations_0 = const()[name = tensor<string, []>("input_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_49_groups_0 = const()[name = tensor<string, []>("input_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_4_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119028544)))];
+            tensor<fp16, [4096]> blocks_4_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127417216)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_49_cast_fp16 = conv(bias = blocks_4_mlp_0_bias_to_fp16, dilations = input_49_dilations_0, groups = input_49_groups_0, pad = input_49_pad_0, pad_type = input_49_pad_type_0, strides = input_49_strides_0, weight = blocks_4_mlp_0_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<string, []> input_51_mode_0 = const()[name = tensor<string, []>("input_51_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_51_cast_fp16 = gelu(mode = input_51_mode_0, x = input_49_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<string, []> var_1284_pad_type_0 = const()[name = tensor<string, []>("op_1284_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1284_strides_0 = const()[name = tensor<string, []>("op_1284_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1284_pad_0 = const()[name = tensor<string, []>("op_1284_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1284_dilations_0 = const()[name = tensor<string, []>("op_1284_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1284_groups_0 = const()[name = tensor<string, []>("op_1284_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_4_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127425472)))];
+            tensor<fp16, [1024]> blocks_4_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135814144)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1284_cast_fp16 = conv(bias = blocks_4_mlp_2_bias_to_fp16, dilations = var_1284_dilations_0, groups = var_1284_groups_0, pad = var_1284_pad_0, pad_type = var_1284_pad_type_0, strides = var_1284_strides_0, weight = blocks_4_mlp_2_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("op_1284_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = var_1284_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, []> var_1293 = const()[name = tensor<string, []>("op_1293"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_53_axes_0 = const()[name = tensor<string, []>("input_53_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_53_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_53_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135816256)))];
+            tensor<fp16, [1024]> input_53_beta_0_to_fp16 = const()[name = tensor<string, []>("input_53_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135818368)))];
+            tensor<fp16, []> var_1309_to_fp16 = const()[name = tensor<string, []>("op_1309_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_53_cast_fp16 = layer_norm(axes = input_53_axes_0, beta = input_53_beta_0_to_fp16, epsilon = var_1309_to_fp16, gamma = input_53_gamma_0_to_fp16, x = inputs_21_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<string, []> q_11_pad_type_0 = const()[name = tensor<string, []>("q_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_11_strides_0 = const()[name = tensor<string, []>("q_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_11_pad_0 = const()[name = tensor<string, []>("q_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_11_dilations_0 = const()[name = tensor<string, []>("q_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_11_groups_0 = const()[name = tensor<string, []>("q_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_1344_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1344_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135820480)))];
+            tensor<fp16, [1024]> var_1344_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1344_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137917696)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1344_cast_fp16 = conv(bias = var_1344_bias_0_to_fp16, dilations = q_11_dilations_0, groups = q_11_groups_0, pad = q_11_pad_0, pad_type = q_11_pad_type_0, strides = q_11_strides_0, weight = var_1344_weight_0_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_1344_cast_fp16")];
+            tensor<string, []> k_11_pad_type_0 = const()[name = tensor<string, []>("k_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_11_strides_0 = const()[name = tensor<string, []>("k_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_11_pad_0 = const()[name = tensor<string, []>("k_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_11_dilations_0 = const()[name = tensor<string, []>("k_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_11_groups_0 = const()[name = tensor<string, []>("k_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_5_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137919808)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_11_cast_fp16 = conv(dilations = k_11_dilations_0, groups = k_11_groups_0, pad = k_11_pad_0, pad_type = k_11_pad_type_0, strides = k_11_strides_0, weight = blocks_5_attn_key_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("k_11_cast_fp16")];
+            tensor<string, []> var_1342_pad_type_0 = const()[name = tensor<string, []>("op_1342_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1342_strides_0 = const()[name = tensor<string, []>("op_1342_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1342_pad_0 = const()[name = tensor<string, []>("op_1342_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1342_dilations_0 = const()[name = tensor<string, []>("op_1342_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1342_groups_0 = const()[name = tensor<string, []>("op_1342_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_5_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140017024)))];
+            tensor<fp16, [1024]> blocks_5_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142114240)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1342_cast_fp16 = conv(bias = blocks_5_attn_value_bias_to_fp16, dilations = var_1342_dilations_0, groups = var_1342_groups_0, pad = var_1342_pad_0, pad_type = var_1342_pad_type_0, strides = var_1342_strides_0, weight = blocks_5_attn_value_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_1342_cast_fp16")];
+            tensor<int32, [16]> tile_15 = const()[name = tensor<string, []>("tile_15"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1345_axis_0 = const()[name = tensor<string, []>("op_1345_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_15 = split(axis = var_1345_axis_0, split_sizes = tile_15, x = var_1344_cast_fp16)[name = tensor<string, []>("op_1345_cast_fp16")];
+            tensor<int32, [4]> var_1362_perm_0 = const()[name = tensor<string, []>("op_1362_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_16 = const()[name = tensor<string, []>("tile_16"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1363_axis_0 = const()[name = tensor<string, []>("op_1363_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_1362_cast_fp16 = transpose(perm = var_1362_perm_0, x = k_11_cast_fp16)[name = tensor<string, []>("transpose_19")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_15 = split(axis = var_1363_axis_0, split_sizes = tile_16, x = var_1362_cast_fp16)[name = tensor<string, []>("op_1363_cast_fp16")];
+            tensor<int32, [16]> tile_17 = const()[name = tensor<string, []>("tile_17"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1380_axis_0 = const()[name = tensor<string, []>("op_1380_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_15 = split(axis = var_1380_axis_0, split_sizes = tile_17, x = var_1342_cast_fp16)[name = tensor<string, []>("op_1380_cast_fp16")];
+            tensor<string, []> aw_161_equation_0 = const()[name = tensor<string, []>("aw_161_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_161_cast_fp16 = einsum(equation = aw_161_equation_0, values = (var_1363_cast_fp16_0, var_1345_cast_fp16_0))[name = tensor<string, []>("aw_161_cast_fp16")];
+            tensor<string, []> aw_163_equation_0 = const()[name = tensor<string, []>("aw_163_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_163_cast_fp16 = einsum(equation = aw_163_equation_0, values = (var_1363_cast_fp16_1, var_1345_cast_fp16_1))[name = tensor<string, []>("aw_163_cast_fp16")];
+            tensor<string, []> aw_165_equation_0 = const()[name = tensor<string, []>("aw_165_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_165_cast_fp16 = einsum(equation = aw_165_equation_0, values = (var_1363_cast_fp16_2, var_1345_cast_fp16_2))[name = tensor<string, []>("aw_165_cast_fp16")];
+            tensor<string, []> aw_167_equation_0 = const()[name = tensor<string, []>("aw_167_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_167_cast_fp16 = einsum(equation = aw_167_equation_0, values = (var_1363_cast_fp16_3, var_1345_cast_fp16_3))[name = tensor<string, []>("aw_167_cast_fp16")];
+            tensor<string, []> aw_169_equation_0 = const()[name = tensor<string, []>("aw_169_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_169_cast_fp16 = einsum(equation = aw_169_equation_0, values = (var_1363_cast_fp16_4, var_1345_cast_fp16_4))[name = tensor<string, []>("aw_169_cast_fp16")];
+            tensor<string, []> aw_171_equation_0 = const()[name = tensor<string, []>("aw_171_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_171_cast_fp16 = einsum(equation = aw_171_equation_0, values = (var_1363_cast_fp16_5, var_1345_cast_fp16_5))[name = tensor<string, []>("aw_171_cast_fp16")];
+            tensor<string, []> aw_173_equation_0 = const()[name = tensor<string, []>("aw_173_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_173_cast_fp16 = einsum(equation = aw_173_equation_0, values = (var_1363_cast_fp16_6, var_1345_cast_fp16_6))[name = tensor<string, []>("aw_173_cast_fp16")];
+            tensor<string, []> aw_175_equation_0 = const()[name = tensor<string, []>("aw_175_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_175_cast_fp16 = einsum(equation = aw_175_equation_0, values = (var_1363_cast_fp16_7, var_1345_cast_fp16_7))[name = tensor<string, []>("aw_175_cast_fp16")];
+            tensor<string, []> aw_177_equation_0 = const()[name = tensor<string, []>("aw_177_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_177_cast_fp16 = einsum(equation = aw_177_equation_0, values = (var_1363_cast_fp16_8, var_1345_cast_fp16_8))[name = tensor<string, []>("aw_177_cast_fp16")];
+            tensor<string, []> aw_179_equation_0 = const()[name = tensor<string, []>("aw_179_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_179_cast_fp16 = einsum(equation = aw_179_equation_0, values = (var_1363_cast_fp16_9, var_1345_cast_fp16_9))[name = tensor<string, []>("aw_179_cast_fp16")];
+            tensor<string, []> aw_181_equation_0 = const()[name = tensor<string, []>("aw_181_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_181_cast_fp16 = einsum(equation = aw_181_equation_0, values = (var_1363_cast_fp16_10, var_1345_cast_fp16_10))[name = tensor<string, []>("aw_181_cast_fp16")];
+            tensor<string, []> aw_183_equation_0 = const()[name = tensor<string, []>("aw_183_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_183_cast_fp16 = einsum(equation = aw_183_equation_0, values = (var_1363_cast_fp16_11, var_1345_cast_fp16_11))[name = tensor<string, []>("aw_183_cast_fp16")];
+            tensor<string, []> aw_185_equation_0 = const()[name = tensor<string, []>("aw_185_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_185_cast_fp16 = einsum(equation = aw_185_equation_0, values = (var_1363_cast_fp16_12, var_1345_cast_fp16_12))[name = tensor<string, []>("aw_185_cast_fp16")];
+            tensor<string, []> aw_187_equation_0 = const()[name = tensor<string, []>("aw_187_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_187_cast_fp16 = einsum(equation = aw_187_equation_0, values = (var_1363_cast_fp16_13, var_1345_cast_fp16_13))[name = tensor<string, []>("aw_187_cast_fp16")];
+            tensor<string, []> aw_189_equation_0 = const()[name = tensor<string, []>("aw_189_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_189_cast_fp16 = einsum(equation = aw_189_equation_0, values = (var_1363_cast_fp16_14, var_1345_cast_fp16_14))[name = tensor<string, []>("aw_189_cast_fp16")];
+            tensor<string, []> aw_191_equation_0 = const()[name = tensor<string, []>("aw_191_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_191_cast_fp16 = einsum(equation = aw_191_equation_0, values = (var_1363_cast_fp16_15, var_1345_cast_fp16_15))[name = tensor<string, []>("aw_191_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1429_cast_fp16 = softmax(axis = var_1293, x = aw_161_cast_fp16)[name = tensor<string, []>("op_1429_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1430_cast_fp16 = softmax(axis = var_1293, x = aw_163_cast_fp16)[name = tensor<string, []>("op_1430_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1431_cast_fp16 = softmax(axis = var_1293, x = aw_165_cast_fp16)[name = tensor<string, []>("op_1431_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1432_cast_fp16 = softmax(axis = var_1293, x = aw_167_cast_fp16)[name = tensor<string, []>("op_1432_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1433_cast_fp16 = softmax(axis = var_1293, x = aw_169_cast_fp16)[name = tensor<string, []>("op_1433_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1434_cast_fp16 = softmax(axis = var_1293, x = aw_171_cast_fp16)[name = tensor<string, []>("op_1434_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1435_cast_fp16 = softmax(axis = var_1293, x = aw_173_cast_fp16)[name = tensor<string, []>("op_1435_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1436_cast_fp16 = softmax(axis = var_1293, x = aw_175_cast_fp16)[name = tensor<string, []>("op_1436_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1437_cast_fp16 = softmax(axis = var_1293, x = aw_177_cast_fp16)[name = tensor<string, []>("op_1437_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1438_cast_fp16 = softmax(axis = var_1293, x = aw_179_cast_fp16)[name = tensor<string, []>("op_1438_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1439_cast_fp16 = softmax(axis = var_1293, x = aw_181_cast_fp16)[name = tensor<string, []>("op_1439_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1440_cast_fp16 = softmax(axis = var_1293, x = aw_183_cast_fp16)[name = tensor<string, []>("op_1440_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1441_cast_fp16 = softmax(axis = var_1293, x = aw_185_cast_fp16)[name = tensor<string, []>("op_1441_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1442_cast_fp16 = softmax(axis = var_1293, x = aw_187_cast_fp16)[name = tensor<string, []>("op_1442_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1443_cast_fp16 = softmax(axis = var_1293, x = aw_189_cast_fp16)[name = tensor<string, []>("op_1443_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1444_cast_fp16 = softmax(axis = var_1293, x = aw_191_cast_fp16)[name = tensor<string, []>("op_1444_cast_fp16")];
+            tensor<string, []> var_1446_equation_0 = const()[name = tensor<string, []>("op_1446_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1446_cast_fp16 = einsum(equation = var_1446_equation_0, values = (var_1380_cast_fp16_0, var_1429_cast_fp16))[name = tensor<string, []>("op_1446_cast_fp16")];
+            tensor<string, []> var_1448_equation_0 = const()[name = tensor<string, []>("op_1448_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1448_cast_fp16 = einsum(equation = var_1448_equation_0, values = (var_1380_cast_fp16_1, var_1430_cast_fp16))[name = tensor<string, []>("op_1448_cast_fp16")];
+            tensor<string, []> var_1450_equation_0 = const()[name = tensor<string, []>("op_1450_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1450_cast_fp16 = einsum(equation = var_1450_equation_0, values = (var_1380_cast_fp16_2, var_1431_cast_fp16))[name = tensor<string, []>("op_1450_cast_fp16")];
+            tensor<string, []> var_1452_equation_0 = const()[name = tensor<string, []>("op_1452_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1452_cast_fp16 = einsum(equation = var_1452_equation_0, values = (var_1380_cast_fp16_3, var_1432_cast_fp16))[name = tensor<string, []>("op_1452_cast_fp16")];
+            tensor<string, []> var_1454_equation_0 = const()[name = tensor<string, []>("op_1454_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1454_cast_fp16 = einsum(equation = var_1454_equation_0, values = (var_1380_cast_fp16_4, var_1433_cast_fp16))[name = tensor<string, []>("op_1454_cast_fp16")];
+            tensor<string, []> var_1456_equation_0 = const()[name = tensor<string, []>("op_1456_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1456_cast_fp16 = einsum(equation = var_1456_equation_0, values = (var_1380_cast_fp16_5, var_1434_cast_fp16))[name = tensor<string, []>("op_1456_cast_fp16")];
+            tensor<string, []> var_1458_equation_0 = const()[name = tensor<string, []>("op_1458_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1458_cast_fp16 = einsum(equation = var_1458_equation_0, values = (var_1380_cast_fp16_6, var_1435_cast_fp16))[name = tensor<string, []>("op_1458_cast_fp16")];
+            tensor<string, []> var_1460_equation_0 = const()[name = tensor<string, []>("op_1460_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1460_cast_fp16 = einsum(equation = var_1460_equation_0, values = (var_1380_cast_fp16_7, var_1436_cast_fp16))[name = tensor<string, []>("op_1460_cast_fp16")];
+            tensor<string, []> var_1462_equation_0 = const()[name = tensor<string, []>("op_1462_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1462_cast_fp16 = einsum(equation = var_1462_equation_0, values = (var_1380_cast_fp16_8, var_1437_cast_fp16))[name = tensor<string, []>("op_1462_cast_fp16")];
+            tensor<string, []> var_1464_equation_0 = const()[name = tensor<string, []>("op_1464_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1464_cast_fp16 = einsum(equation = var_1464_equation_0, values = (var_1380_cast_fp16_9, var_1438_cast_fp16))[name = tensor<string, []>("op_1464_cast_fp16")];
+            tensor<string, []> var_1466_equation_0 = const()[name = tensor<string, []>("op_1466_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1466_cast_fp16 = einsum(equation = var_1466_equation_0, values = (var_1380_cast_fp16_10, var_1439_cast_fp16))[name = tensor<string, []>("op_1466_cast_fp16")];
+            tensor<string, []> var_1468_equation_0 = const()[name = tensor<string, []>("op_1468_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1468_cast_fp16 = einsum(equation = var_1468_equation_0, values = (var_1380_cast_fp16_11, var_1440_cast_fp16))[name = tensor<string, []>("op_1468_cast_fp16")];
+            tensor<string, []> var_1470_equation_0 = const()[name = tensor<string, []>("op_1470_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1470_cast_fp16 = einsum(equation = var_1470_equation_0, values = (var_1380_cast_fp16_12, var_1441_cast_fp16))[name = tensor<string, []>("op_1470_cast_fp16")];
+            tensor<string, []> var_1472_equation_0 = const()[name = tensor<string, []>("op_1472_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1472_cast_fp16 = einsum(equation = var_1472_equation_0, values = (var_1380_cast_fp16_13, var_1442_cast_fp16))[name = tensor<string, []>("op_1472_cast_fp16")];
+            tensor<string, []> var_1474_equation_0 = const()[name = tensor<string, []>("op_1474_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1474_cast_fp16 = einsum(equation = var_1474_equation_0, values = (var_1380_cast_fp16_14, var_1443_cast_fp16))[name = tensor<string, []>("op_1474_cast_fp16")];
+            tensor<string, []> var_1476_equation_0 = const()[name = tensor<string, []>("op_1476_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1476_cast_fp16 = einsum(equation = var_1476_equation_0, values = (var_1380_cast_fp16_15, var_1444_cast_fp16))[name = tensor<string, []>("op_1476_cast_fp16")];
+            tensor<bool, []> input_55_interleave_0 = const()[name = tensor<string, []>("input_55_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_55_cast_fp16 = concat(axis = var_1293, interleave = input_55_interleave_0, values = (var_1446_cast_fp16, var_1448_cast_fp16, var_1450_cast_fp16, var_1452_cast_fp16, var_1454_cast_fp16, var_1456_cast_fp16, var_1458_cast_fp16, var_1460_cast_fp16, var_1462_cast_fp16, var_1464_cast_fp16, var_1466_cast_fp16, var_1468_cast_fp16, var_1470_cast_fp16, var_1472_cast_fp16, var_1474_cast_fp16, var_1476_cast_fp16))[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<string, []> var_1485_pad_type_0 = const()[name = tensor<string, []>("op_1485_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1485_strides_0 = const()[name = tensor<string, []>("op_1485_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1485_pad_0 = const()[name = tensor<string, []>("op_1485_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1485_dilations_0 = const()[name = tensor<string, []>("op_1485_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1485_groups_0 = const()[name = tensor<string, []>("op_1485_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_5_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142116352)))];
+            tensor<fp16, [1024]> blocks_5_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144213568)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1485_cast_fp16 = conv(bias = blocks_5_attn_out_bias_to_fp16, dilations = var_1485_dilations_0, groups = var_1485_groups_0, pad = var_1485_pad_0, pad_type = var_1485_pad_type_0, strides = var_1485_strides_0, weight = blocks_5_attn_out_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("op_1485_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = var_1485_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> input_57_axes_0 = const()[name = tensor<string, []>("input_57_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_57_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_57_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144215680)))];
+            tensor<fp16, [1024]> input_57_beta_0_to_fp16 = const()[name = tensor<string, []>("input_57_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144217792)))];
+            tensor<fp16, []> var_1495_to_fp16 = const()[name = tensor<string, []>("op_1495_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_57_cast_fp16 = layer_norm(axes = input_57_axes_0, beta = input_57_beta_0_to_fp16, epsilon = var_1495_to_fp16, gamma = input_57_gamma_0_to_fp16, x = inputs_23_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<string, []> input_59_pad_type_0 = const()[name = tensor<string, []>("input_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_59_strides_0 = const()[name = tensor<string, []>("input_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_59_pad_0 = const()[name = tensor<string, []>("input_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_59_dilations_0 = const()[name = tensor<string, []>("input_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_59_groups_0 = const()[name = tensor<string, []>("input_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_5_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144219904)))];
+            tensor<fp16, [4096]> blocks_5_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152608576)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_59_cast_fp16 = conv(bias = blocks_5_mlp_0_bias_to_fp16, dilations = input_59_dilations_0, groups = input_59_groups_0, pad = input_59_pad_0, pad_type = input_59_pad_type_0, strides = input_59_strides_0, weight = blocks_5_mlp_0_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<string, []> input_61_mode_0 = const()[name = tensor<string, []>("input_61_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_61_cast_fp16 = gelu(mode = input_61_mode_0, x = input_59_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
+            tensor<string, []> var_1521_pad_type_0 = const()[name = tensor<string, []>("op_1521_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1521_strides_0 = const()[name = tensor<string, []>("op_1521_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1521_pad_0 = const()[name = tensor<string, []>("op_1521_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1521_dilations_0 = const()[name = tensor<string, []>("op_1521_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1521_groups_0 = const()[name = tensor<string, []>("op_1521_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_5_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152616832)))];
+            tensor<fp16, [1024]> blocks_5_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161005504)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1521_cast_fp16 = conv(bias = blocks_5_mlp_2_bias_to_fp16, dilations = var_1521_dilations_0, groups = var_1521_groups_0, pad = var_1521_pad_0, pad_type = var_1521_pad_type_0, strides = var_1521_strides_0, weight = blocks_5_mlp_2_weight_to_fp16, x = input_61_cast_fp16)[name = tensor<string, []>("op_1521_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = var_1521_cast_fp16)[name = tensor<string, []>("inputs_25_cast_fp16")];
+            tensor<int32, []> var_1530 = const()[name = tensor<string, []>("op_1530"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_63_axes_0 = const()[name = tensor<string, []>("input_63_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_63_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_63_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161007616)))];
+            tensor<fp16, [1024]> input_63_beta_0_to_fp16 = const()[name = tensor<string, []>("input_63_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161009728)))];
+            tensor<fp16, []> var_1546_to_fp16 = const()[name = tensor<string, []>("op_1546_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_63_cast_fp16 = layer_norm(axes = input_63_axes_0, beta = input_63_beta_0_to_fp16, epsilon = var_1546_to_fp16, gamma = input_63_gamma_0_to_fp16, x = inputs_25_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
+            tensor<string, []> q_13_pad_type_0 = const()[name = tensor<string, []>("q_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_13_strides_0 = const()[name = tensor<string, []>("q_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_13_pad_0 = const()[name = tensor<string, []>("q_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_13_dilations_0 = const()[name = tensor<string, []>("q_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_13_groups_0 = const()[name = tensor<string, []>("q_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_1581_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1581_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161011840)))];
+            tensor<fp16, [1024]> var_1581_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1581_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163109056)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1581_cast_fp16 = conv(bias = var_1581_bias_0_to_fp16, dilations = q_13_dilations_0, groups = q_13_groups_0, pad = q_13_pad_0, pad_type = q_13_pad_type_0, strides = q_13_strides_0, weight = var_1581_weight_0_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("op_1581_cast_fp16")];
+            tensor<string, []> k_13_pad_type_0 = const()[name = tensor<string, []>("k_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_13_strides_0 = const()[name = tensor<string, []>("k_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_13_pad_0 = const()[name = tensor<string, []>("k_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_13_dilations_0 = const()[name = tensor<string, []>("k_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_13_groups_0 = const()[name = tensor<string, []>("k_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_6_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163111168)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_13_cast_fp16 = conv(dilations = k_13_dilations_0, groups = k_13_groups_0, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = k_13_strides_0, weight = blocks_6_attn_key_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("k_13_cast_fp16")];
+            tensor<string, []> var_1579_pad_type_0 = const()[name = tensor<string, []>("op_1579_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1579_strides_0 = const()[name = tensor<string, []>("op_1579_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1579_pad_0 = const()[name = tensor<string, []>("op_1579_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1579_dilations_0 = const()[name = tensor<string, []>("op_1579_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1579_groups_0 = const()[name = tensor<string, []>("op_1579_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_6_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165208384)))];
+            tensor<fp16, [1024]> blocks_6_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167305600)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1579_cast_fp16 = conv(bias = blocks_6_attn_value_bias_to_fp16, dilations = var_1579_dilations_0, groups = var_1579_groups_0, pad = var_1579_pad_0, pad_type = var_1579_pad_type_0, strides = var_1579_strides_0, weight = blocks_6_attn_value_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("op_1579_cast_fp16")];
+            tensor<int32, [16]> tile_18 = const()[name = tensor<string, []>("tile_18"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1582_axis_0 = const()[name = tensor<string, []>("op_1582_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_15 = split(axis = var_1582_axis_0, split_sizes = tile_18, x = var_1581_cast_fp16)[name = tensor<string, []>("op_1582_cast_fp16")];
+            tensor<int32, [4]> var_1599_perm_0 = const()[name = tensor<string, []>("op_1599_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_19 = const()[name = tensor<string, []>("tile_19"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1600_axis_0 = const()[name = tensor<string, []>("op_1600_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_1599_cast_fp16 = transpose(perm = var_1599_perm_0, x = k_13_cast_fp16)[name = tensor<string, []>("transpose_18")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_15 = split(axis = var_1600_axis_0, split_sizes = tile_19, x = var_1599_cast_fp16)[name = tensor<string, []>("op_1600_cast_fp16")];
+            tensor<int32, [16]> tile_20 = const()[name = tensor<string, []>("tile_20"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1617_axis_0 = const()[name = tensor<string, []>("op_1617_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_15 = split(axis = var_1617_axis_0, split_sizes = tile_20, x = var_1579_cast_fp16)[name = tensor<string, []>("op_1617_cast_fp16")];
+            tensor<string, []> aw_193_equation_0 = const()[name = tensor<string, []>("aw_193_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_193_cast_fp16 = einsum(equation = aw_193_equation_0, values = (var_1600_cast_fp16_0, var_1582_cast_fp16_0))[name = tensor<string, []>("aw_193_cast_fp16")];
+            tensor<string, []> aw_195_equation_0 = const()[name = tensor<string, []>("aw_195_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_195_cast_fp16 = einsum(equation = aw_195_equation_0, values = (var_1600_cast_fp16_1, var_1582_cast_fp16_1))[name = tensor<string, []>("aw_195_cast_fp16")];
+            tensor<string, []> aw_197_equation_0 = const()[name = tensor<string, []>("aw_197_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_197_cast_fp16 = einsum(equation = aw_197_equation_0, values = (var_1600_cast_fp16_2, var_1582_cast_fp16_2))[name = tensor<string, []>("aw_197_cast_fp16")];
+            tensor<string, []> aw_199_equation_0 = const()[name = tensor<string, []>("aw_199_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_199_cast_fp16 = einsum(equation = aw_199_equation_0, values = (var_1600_cast_fp16_3, var_1582_cast_fp16_3))[name = tensor<string, []>("aw_199_cast_fp16")];
+            tensor<string, []> aw_201_equation_0 = const()[name = tensor<string, []>("aw_201_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_201_cast_fp16 = einsum(equation = aw_201_equation_0, values = (var_1600_cast_fp16_4, var_1582_cast_fp16_4))[name = tensor<string, []>("aw_201_cast_fp16")];
+            tensor<string, []> aw_203_equation_0 = const()[name = tensor<string, []>("aw_203_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_203_cast_fp16 = einsum(equation = aw_203_equation_0, values = (var_1600_cast_fp16_5, var_1582_cast_fp16_5))[name = tensor<string, []>("aw_203_cast_fp16")];
+            tensor<string, []> aw_205_equation_0 = const()[name = tensor<string, []>("aw_205_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_205_cast_fp16 = einsum(equation = aw_205_equation_0, values = (var_1600_cast_fp16_6, var_1582_cast_fp16_6))[name = tensor<string, []>("aw_205_cast_fp16")];
+            tensor<string, []> aw_207_equation_0 = const()[name = tensor<string, []>("aw_207_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_207_cast_fp16 = einsum(equation = aw_207_equation_0, values = (var_1600_cast_fp16_7, var_1582_cast_fp16_7))[name = tensor<string, []>("aw_207_cast_fp16")];
+            tensor<string, []> aw_209_equation_0 = const()[name = tensor<string, []>("aw_209_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_209_cast_fp16 = einsum(equation = aw_209_equation_0, values = (var_1600_cast_fp16_8, var_1582_cast_fp16_8))[name = tensor<string, []>("aw_209_cast_fp16")];
+            tensor<string, []> aw_211_equation_0 = const()[name = tensor<string, []>("aw_211_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_211_cast_fp16 = einsum(equation = aw_211_equation_0, values = (var_1600_cast_fp16_9, var_1582_cast_fp16_9))[name = tensor<string, []>("aw_211_cast_fp16")];
+            tensor<string, []> aw_213_equation_0 = const()[name = tensor<string, []>("aw_213_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_213_cast_fp16 = einsum(equation = aw_213_equation_0, values = (var_1600_cast_fp16_10, var_1582_cast_fp16_10))[name = tensor<string, []>("aw_213_cast_fp16")];
+            tensor<string, []> aw_215_equation_0 = const()[name = tensor<string, []>("aw_215_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_215_cast_fp16 = einsum(equation = aw_215_equation_0, values = (var_1600_cast_fp16_11, var_1582_cast_fp16_11))[name = tensor<string, []>("aw_215_cast_fp16")];
+            tensor<string, []> aw_217_equation_0 = const()[name = tensor<string, []>("aw_217_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_217_cast_fp16 = einsum(equation = aw_217_equation_0, values = (var_1600_cast_fp16_12, var_1582_cast_fp16_12))[name = tensor<string, []>("aw_217_cast_fp16")];
+            tensor<string, []> aw_219_equation_0 = const()[name = tensor<string, []>("aw_219_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_219_cast_fp16 = einsum(equation = aw_219_equation_0, values = (var_1600_cast_fp16_13, var_1582_cast_fp16_13))[name = tensor<string, []>("aw_219_cast_fp16")];
+            tensor<string, []> aw_221_equation_0 = const()[name = tensor<string, []>("aw_221_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_221_cast_fp16 = einsum(equation = aw_221_equation_0, values = (var_1600_cast_fp16_14, var_1582_cast_fp16_14))[name = tensor<string, []>("aw_221_cast_fp16")];
+            tensor<string, []> aw_223_equation_0 = const()[name = tensor<string, []>("aw_223_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_223_cast_fp16 = einsum(equation = aw_223_equation_0, values = (var_1600_cast_fp16_15, var_1582_cast_fp16_15))[name = tensor<string, []>("aw_223_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1666_cast_fp16 = softmax(axis = var_1530, x = aw_193_cast_fp16)[name = tensor<string, []>("op_1666_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1667_cast_fp16 = softmax(axis = var_1530, x = aw_195_cast_fp16)[name = tensor<string, []>("op_1667_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1668_cast_fp16 = softmax(axis = var_1530, x = aw_197_cast_fp16)[name = tensor<string, []>("op_1668_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1669_cast_fp16 = softmax(axis = var_1530, x = aw_199_cast_fp16)[name = tensor<string, []>("op_1669_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1670_cast_fp16 = softmax(axis = var_1530, x = aw_201_cast_fp16)[name = tensor<string, []>("op_1670_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1671_cast_fp16 = softmax(axis = var_1530, x = aw_203_cast_fp16)[name = tensor<string, []>("op_1671_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1672_cast_fp16 = softmax(axis = var_1530, x = aw_205_cast_fp16)[name = tensor<string, []>("op_1672_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1673_cast_fp16 = softmax(axis = var_1530, x = aw_207_cast_fp16)[name = tensor<string, []>("op_1673_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1674_cast_fp16 = softmax(axis = var_1530, x = aw_209_cast_fp16)[name = tensor<string, []>("op_1674_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1675_cast_fp16 = softmax(axis = var_1530, x = aw_211_cast_fp16)[name = tensor<string, []>("op_1675_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1676_cast_fp16 = softmax(axis = var_1530, x = aw_213_cast_fp16)[name = tensor<string, []>("op_1676_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1677_cast_fp16 = softmax(axis = var_1530, x = aw_215_cast_fp16)[name = tensor<string, []>("op_1677_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1678_cast_fp16 = softmax(axis = var_1530, x = aw_217_cast_fp16)[name = tensor<string, []>("op_1678_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1679_cast_fp16 = softmax(axis = var_1530, x = aw_219_cast_fp16)[name = tensor<string, []>("op_1679_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1680_cast_fp16 = softmax(axis = var_1530, x = aw_221_cast_fp16)[name = tensor<string, []>("op_1680_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1681_cast_fp16 = softmax(axis = var_1530, x = aw_223_cast_fp16)[name = tensor<string, []>("op_1681_cast_fp16")];
+            tensor<string, []> var_1683_equation_0 = const()[name = tensor<string, []>("op_1683_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1683_cast_fp16 = einsum(equation = var_1683_equation_0, values = (var_1617_cast_fp16_0, var_1666_cast_fp16))[name = tensor<string, []>("op_1683_cast_fp16")];
+            tensor<string, []> var_1685_equation_0 = const()[name = tensor<string, []>("op_1685_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1685_cast_fp16 = einsum(equation = var_1685_equation_0, values = (var_1617_cast_fp16_1, var_1667_cast_fp16))[name = tensor<string, []>("op_1685_cast_fp16")];
+            tensor<string, []> var_1687_equation_0 = const()[name = tensor<string, []>("op_1687_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1687_cast_fp16 = einsum(equation = var_1687_equation_0, values = (var_1617_cast_fp16_2, var_1668_cast_fp16))[name = tensor<string, []>("op_1687_cast_fp16")];
+            tensor<string, []> var_1689_equation_0 = const()[name = tensor<string, []>("op_1689_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1689_cast_fp16 = einsum(equation = var_1689_equation_0, values = (var_1617_cast_fp16_3, var_1669_cast_fp16))[name = tensor<string, []>("op_1689_cast_fp16")];
+            tensor<string, []> var_1691_equation_0 = const()[name = tensor<string, []>("op_1691_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1691_cast_fp16 = einsum(equation = var_1691_equation_0, values = (var_1617_cast_fp16_4, var_1670_cast_fp16))[name = tensor<string, []>("op_1691_cast_fp16")];
+            tensor<string, []> var_1693_equation_0 = const()[name = tensor<string, []>("op_1693_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1693_cast_fp16 = einsum(equation = var_1693_equation_0, values = (var_1617_cast_fp16_5, var_1671_cast_fp16))[name = tensor<string, []>("op_1693_cast_fp16")];
+            tensor<string, []> var_1695_equation_0 = const()[name = tensor<string, []>("op_1695_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1695_cast_fp16 = einsum(equation = var_1695_equation_0, values = (var_1617_cast_fp16_6, var_1672_cast_fp16))[name = tensor<string, []>("op_1695_cast_fp16")];
+            tensor<string, []> var_1697_equation_0 = const()[name = tensor<string, []>("op_1697_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1697_cast_fp16 = einsum(equation = var_1697_equation_0, values = (var_1617_cast_fp16_7, var_1673_cast_fp16))[name = tensor<string, []>("op_1697_cast_fp16")];
+            tensor<string, []> var_1699_equation_0 = const()[name = tensor<string, []>("op_1699_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1699_cast_fp16 = einsum(equation = var_1699_equation_0, values = (var_1617_cast_fp16_8, var_1674_cast_fp16))[name = tensor<string, []>("op_1699_cast_fp16")];
+            tensor<string, []> var_1701_equation_0 = const()[name = tensor<string, []>("op_1701_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1701_cast_fp16 = einsum(equation = var_1701_equation_0, values = (var_1617_cast_fp16_9, var_1675_cast_fp16))[name = tensor<string, []>("op_1701_cast_fp16")];
+            tensor<string, []> var_1703_equation_0 = const()[name = tensor<string, []>("op_1703_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1703_cast_fp16 = einsum(equation = var_1703_equation_0, values = (var_1617_cast_fp16_10, var_1676_cast_fp16))[name = tensor<string, []>("op_1703_cast_fp16")];
+            tensor<string, []> var_1705_equation_0 = const()[name = tensor<string, []>("op_1705_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1705_cast_fp16 = einsum(equation = var_1705_equation_0, values = (var_1617_cast_fp16_11, var_1677_cast_fp16))[name = tensor<string, []>("op_1705_cast_fp16")];
+            tensor<string, []> var_1707_equation_0 = const()[name = tensor<string, []>("op_1707_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1707_cast_fp16 = einsum(equation = var_1707_equation_0, values = (var_1617_cast_fp16_12, var_1678_cast_fp16))[name = tensor<string, []>("op_1707_cast_fp16")];
+            tensor<string, []> var_1709_equation_0 = const()[name = tensor<string, []>("op_1709_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1709_cast_fp16 = einsum(equation = var_1709_equation_0, values = (var_1617_cast_fp16_13, var_1679_cast_fp16))[name = tensor<string, []>("op_1709_cast_fp16")];
+            tensor<string, []> var_1711_equation_0 = const()[name = tensor<string, []>("op_1711_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1711_cast_fp16 = einsum(equation = var_1711_equation_0, values = (var_1617_cast_fp16_14, var_1680_cast_fp16))[name = tensor<string, []>("op_1711_cast_fp16")];
+            tensor<string, []> var_1713_equation_0 = const()[name = tensor<string, []>("op_1713_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1713_cast_fp16 = einsum(equation = var_1713_equation_0, values = (var_1617_cast_fp16_15, var_1681_cast_fp16))[name = tensor<string, []>("op_1713_cast_fp16")];
+            tensor<bool, []> input_65_interleave_0 = const()[name = tensor<string, []>("input_65_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_65_cast_fp16 = concat(axis = var_1530, interleave = input_65_interleave_0, values = (var_1683_cast_fp16, var_1685_cast_fp16, var_1687_cast_fp16, var_1689_cast_fp16, var_1691_cast_fp16, var_1693_cast_fp16, var_1695_cast_fp16, var_1697_cast_fp16, var_1699_cast_fp16, var_1701_cast_fp16, var_1703_cast_fp16, var_1705_cast_fp16, var_1707_cast_fp16, var_1709_cast_fp16, var_1711_cast_fp16, var_1713_cast_fp16))[name = tensor<string, []>("input_65_cast_fp16")];
+            tensor<string, []> var_1722_pad_type_0 = const()[name = tensor<string, []>("op_1722_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1722_strides_0 = const()[name = tensor<string, []>("op_1722_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1722_pad_0 = const()[name = tensor<string, []>("op_1722_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1722_dilations_0 = const()[name = tensor<string, []>("op_1722_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1722_groups_0 = const()[name = tensor<string, []>("op_1722_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_6_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167307712)))];
+            tensor<fp16, [1024]> blocks_6_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(169404928)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1722_cast_fp16 = conv(bias = blocks_6_attn_out_bias_to_fp16, dilations = var_1722_dilations_0, groups = var_1722_groups_0, pad = var_1722_pad_0, pad_type = var_1722_pad_type_0, strides = var_1722_strides_0, weight = blocks_6_attn_out_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("op_1722_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = var_1722_cast_fp16)[name = tensor<string, []>("inputs_27_cast_fp16")];
+            tensor<int32, [1]> input_67_axes_0 = const()[name = tensor<string, []>("input_67_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_67_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_67_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(169407040)))];
+            tensor<fp16, [1024]> input_67_beta_0_to_fp16 = const()[name = tensor<string, []>("input_67_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(169409152)))];
+            tensor<fp16, []> var_1732_to_fp16 = const()[name = tensor<string, []>("op_1732_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_67_cast_fp16 = layer_norm(axes = input_67_axes_0, beta = input_67_beta_0_to_fp16, epsilon = var_1732_to_fp16, gamma = input_67_gamma_0_to_fp16, x = inputs_27_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
+            tensor<string, []> input_69_pad_type_0 = const()[name = tensor<string, []>("input_69_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_69_strides_0 = const()[name = tensor<string, []>("input_69_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_69_pad_0 = const()[name = tensor<string, []>("input_69_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_69_dilations_0 = const()[name = tensor<string, []>("input_69_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_69_groups_0 = const()[name = tensor<string, []>("input_69_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_6_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(169411264)))];
+            tensor<fp16, [4096]> blocks_6_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177799936)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_69_cast_fp16 = conv(bias = blocks_6_mlp_0_bias_to_fp16, dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = blocks_6_mlp_0_weight_to_fp16, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
+            tensor<string, []> input_71_mode_0 = const()[name = tensor<string, []>("input_71_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
+            tensor<string, []> var_1758_pad_type_0 = const()[name = tensor<string, []>("op_1758_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1758_strides_0 = const()[name = tensor<string, []>("op_1758_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1758_pad_0 = const()[name = tensor<string, []>("op_1758_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1758_dilations_0 = const()[name = tensor<string, []>("op_1758_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1758_groups_0 = const()[name = tensor<string, []>("op_1758_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_6_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177808192)))];
+            tensor<fp16, [1024]> blocks_6_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186196864)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1758_cast_fp16 = conv(bias = blocks_6_mlp_2_bias_to_fp16, dilations = var_1758_dilations_0, groups = var_1758_groups_0, pad = var_1758_pad_0, pad_type = var_1758_pad_type_0, strides = var_1758_strides_0, weight = blocks_6_mlp_2_weight_to_fp16, x = input_71_cast_fp16)[name = tensor<string, []>("op_1758_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = var_1758_cast_fp16)[name = tensor<string, []>("inputs_29_cast_fp16")];
+            tensor<int32, []> var_1767 = const()[name = tensor<string, []>("op_1767"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_73_axes_0 = const()[name = tensor<string, []>("input_73_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_73_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_73_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186198976)))];
+            tensor<fp16, [1024]> input_73_beta_0_to_fp16 = const()[name = tensor<string, []>("input_73_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186201088)))];
+            tensor<fp16, []> var_1783_to_fp16 = const()[name = tensor<string, []>("op_1783_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_73_cast_fp16 = layer_norm(axes = input_73_axes_0, beta = input_73_beta_0_to_fp16, epsilon = var_1783_to_fp16, gamma = input_73_gamma_0_to_fp16, x = inputs_29_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
+            tensor<string, []> q_15_pad_type_0 = const()[name = tensor<string, []>("q_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_15_strides_0 = const()[name = tensor<string, []>("q_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_15_pad_0 = const()[name = tensor<string, []>("q_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_15_dilations_0 = const()[name = tensor<string, []>("q_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_15_groups_0 = const()[name = tensor<string, []>("q_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_1818_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1818_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186203200)))];
+            tensor<fp16, [1024]> var_1818_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1818_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188300416)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1818_cast_fp16 = conv(bias = var_1818_bias_0_to_fp16, dilations = q_15_dilations_0, groups = q_15_groups_0, pad = q_15_pad_0, pad_type = q_15_pad_type_0, strides = q_15_strides_0, weight = var_1818_weight_0_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("op_1818_cast_fp16")];
+            tensor<string, []> k_15_pad_type_0 = const()[name = tensor<string, []>("k_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_15_strides_0 = const()[name = tensor<string, []>("k_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_15_pad_0 = const()[name = tensor<string, []>("k_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_15_dilations_0 = const()[name = tensor<string, []>("k_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_15_groups_0 = const()[name = tensor<string, []>("k_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_7_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188302528)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_15_cast_fp16 = conv(dilations = k_15_dilations_0, groups = k_15_groups_0, pad = k_15_pad_0, pad_type = k_15_pad_type_0, strides = k_15_strides_0, weight = blocks_7_attn_key_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("k_15_cast_fp16")];
+            tensor<string, []> var_1816_pad_type_0 = const()[name = tensor<string, []>("op_1816_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1816_strides_0 = const()[name = tensor<string, []>("op_1816_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1816_pad_0 = const()[name = tensor<string, []>("op_1816_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1816_dilations_0 = const()[name = tensor<string, []>("op_1816_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1816_groups_0 = const()[name = tensor<string, []>("op_1816_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_7_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(190399744)))];
+            tensor<fp16, [1024]> blocks_7_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192496960)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1816_cast_fp16 = conv(bias = blocks_7_attn_value_bias_to_fp16, dilations = var_1816_dilations_0, groups = var_1816_groups_0, pad = var_1816_pad_0, pad_type = var_1816_pad_type_0, strides = var_1816_strides_0, weight = blocks_7_attn_value_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("op_1816_cast_fp16")];
+            tensor<int32, [16]> tile_21 = const()[name = tensor<string, []>("tile_21"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1819_axis_0 = const()[name = tensor<string, []>("op_1819_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_15 = split(axis = var_1819_axis_0, split_sizes = tile_21, x = var_1818_cast_fp16)[name = tensor<string, []>("op_1819_cast_fp16")];
+            tensor<int32, [4]> var_1836_perm_0 = const()[name = tensor<string, []>("op_1836_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_22 = const()[name = tensor<string, []>("tile_22"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1837_axis_0 = const()[name = tensor<string, []>("op_1837_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_1836_cast_fp16 = transpose(perm = var_1836_perm_0, x = k_15_cast_fp16)[name = tensor<string, []>("transpose_17")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_15 = split(axis = var_1837_axis_0, split_sizes = tile_22, x = var_1836_cast_fp16)[name = tensor<string, []>("op_1837_cast_fp16")];
+            tensor<int32, [16]> tile_23 = const()[name = tensor<string, []>("tile_23"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1854_axis_0 = const()[name = tensor<string, []>("op_1854_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_15 = split(axis = var_1854_axis_0, split_sizes = tile_23, x = var_1816_cast_fp16)[name = tensor<string, []>("op_1854_cast_fp16")];
+            tensor<string, []> aw_225_equation_0 = const()[name = tensor<string, []>("aw_225_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_225_cast_fp16 = einsum(equation = aw_225_equation_0, values = (var_1837_cast_fp16_0, var_1819_cast_fp16_0))[name = tensor<string, []>("aw_225_cast_fp16")];
+            tensor<string, []> aw_227_equation_0 = const()[name = tensor<string, []>("aw_227_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_227_cast_fp16 = einsum(equation = aw_227_equation_0, values = (var_1837_cast_fp16_1, var_1819_cast_fp16_1))[name = tensor<string, []>("aw_227_cast_fp16")];
+            tensor<string, []> aw_229_equation_0 = const()[name = tensor<string, []>("aw_229_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_229_cast_fp16 = einsum(equation = aw_229_equation_0, values = (var_1837_cast_fp16_2, var_1819_cast_fp16_2))[name = tensor<string, []>("aw_229_cast_fp16")];
+            tensor<string, []> aw_231_equation_0 = const()[name = tensor<string, []>("aw_231_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_231_cast_fp16 = einsum(equation = aw_231_equation_0, values = (var_1837_cast_fp16_3, var_1819_cast_fp16_3))[name = tensor<string, []>("aw_231_cast_fp16")];
+            tensor<string, []> aw_233_equation_0 = const()[name = tensor<string, []>("aw_233_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_233_cast_fp16 = einsum(equation = aw_233_equation_0, values = (var_1837_cast_fp16_4, var_1819_cast_fp16_4))[name = tensor<string, []>("aw_233_cast_fp16")];
+            tensor<string, []> aw_235_equation_0 = const()[name = tensor<string, []>("aw_235_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_235_cast_fp16 = einsum(equation = aw_235_equation_0, values = (var_1837_cast_fp16_5, var_1819_cast_fp16_5))[name = tensor<string, []>("aw_235_cast_fp16")];
+            tensor<string, []> aw_237_equation_0 = const()[name = tensor<string, []>("aw_237_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_237_cast_fp16 = einsum(equation = aw_237_equation_0, values = (var_1837_cast_fp16_6, var_1819_cast_fp16_6))[name = tensor<string, []>("aw_237_cast_fp16")];
+            tensor<string, []> aw_239_equation_0 = const()[name = tensor<string, []>("aw_239_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_239_cast_fp16 = einsum(equation = aw_239_equation_0, values = (var_1837_cast_fp16_7, var_1819_cast_fp16_7))[name = tensor<string, []>("aw_239_cast_fp16")];
+            tensor<string, []> aw_241_equation_0 = const()[name = tensor<string, []>("aw_241_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_241_cast_fp16 = einsum(equation = aw_241_equation_0, values = (var_1837_cast_fp16_8, var_1819_cast_fp16_8))[name = tensor<string, []>("aw_241_cast_fp16")];
+            tensor<string, []> aw_243_equation_0 = const()[name = tensor<string, []>("aw_243_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_243_cast_fp16 = einsum(equation = aw_243_equation_0, values = (var_1837_cast_fp16_9, var_1819_cast_fp16_9))[name = tensor<string, []>("aw_243_cast_fp16")];
+            tensor<string, []> aw_245_equation_0 = const()[name = tensor<string, []>("aw_245_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_245_cast_fp16 = einsum(equation = aw_245_equation_0, values = (var_1837_cast_fp16_10, var_1819_cast_fp16_10))[name = tensor<string, []>("aw_245_cast_fp16")];
+            tensor<string, []> aw_247_equation_0 = const()[name = tensor<string, []>("aw_247_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_247_cast_fp16 = einsum(equation = aw_247_equation_0, values = (var_1837_cast_fp16_11, var_1819_cast_fp16_11))[name = tensor<string, []>("aw_247_cast_fp16")];
+            tensor<string, []> aw_249_equation_0 = const()[name = tensor<string, []>("aw_249_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_249_cast_fp16 = einsum(equation = aw_249_equation_0, values = (var_1837_cast_fp16_12, var_1819_cast_fp16_12))[name = tensor<string, []>("aw_249_cast_fp16")];
+            tensor<string, []> aw_251_equation_0 = const()[name = tensor<string, []>("aw_251_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_251_cast_fp16 = einsum(equation = aw_251_equation_0, values = (var_1837_cast_fp16_13, var_1819_cast_fp16_13))[name = tensor<string, []>("aw_251_cast_fp16")];
+            tensor<string, []> aw_253_equation_0 = const()[name = tensor<string, []>("aw_253_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_253_cast_fp16 = einsum(equation = aw_253_equation_0, values = (var_1837_cast_fp16_14, var_1819_cast_fp16_14))[name = tensor<string, []>("aw_253_cast_fp16")];
+            tensor<string, []> aw_255_equation_0 = const()[name = tensor<string, []>("aw_255_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_255_cast_fp16 = einsum(equation = aw_255_equation_0, values = (var_1837_cast_fp16_15, var_1819_cast_fp16_15))[name = tensor<string, []>("aw_255_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1903_cast_fp16 = softmax(axis = var_1767, x = aw_225_cast_fp16)[name = tensor<string, []>("op_1903_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1904_cast_fp16 = softmax(axis = var_1767, x = aw_227_cast_fp16)[name = tensor<string, []>("op_1904_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1905_cast_fp16 = softmax(axis = var_1767, x = aw_229_cast_fp16)[name = tensor<string, []>("op_1905_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1906_cast_fp16 = softmax(axis = var_1767, x = aw_231_cast_fp16)[name = tensor<string, []>("op_1906_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1907_cast_fp16 = softmax(axis = var_1767, x = aw_233_cast_fp16)[name = tensor<string, []>("op_1907_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1908_cast_fp16 = softmax(axis = var_1767, x = aw_235_cast_fp16)[name = tensor<string, []>("op_1908_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1909_cast_fp16 = softmax(axis = var_1767, x = aw_237_cast_fp16)[name = tensor<string, []>("op_1909_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1910_cast_fp16 = softmax(axis = var_1767, x = aw_239_cast_fp16)[name = tensor<string, []>("op_1910_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1911_cast_fp16 = softmax(axis = var_1767, x = aw_241_cast_fp16)[name = tensor<string, []>("op_1911_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1912_cast_fp16 = softmax(axis = var_1767, x = aw_243_cast_fp16)[name = tensor<string, []>("op_1912_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1913_cast_fp16 = softmax(axis = var_1767, x = aw_245_cast_fp16)[name = tensor<string, []>("op_1913_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1914_cast_fp16 = softmax(axis = var_1767, x = aw_247_cast_fp16)[name = tensor<string, []>("op_1914_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1915_cast_fp16 = softmax(axis = var_1767, x = aw_249_cast_fp16)[name = tensor<string, []>("op_1915_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1916_cast_fp16 = softmax(axis = var_1767, x = aw_251_cast_fp16)[name = tensor<string, []>("op_1916_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1917_cast_fp16 = softmax(axis = var_1767, x = aw_253_cast_fp16)[name = tensor<string, []>("op_1917_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1918_cast_fp16 = softmax(axis = var_1767, x = aw_255_cast_fp16)[name = tensor<string, []>("op_1918_cast_fp16")];
+            tensor<string, []> var_1920_equation_0 = const()[name = tensor<string, []>("op_1920_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1920_cast_fp16 = einsum(equation = var_1920_equation_0, values = (var_1854_cast_fp16_0, var_1903_cast_fp16))[name = tensor<string, []>("op_1920_cast_fp16")];
+            tensor<string, []> var_1922_equation_0 = const()[name = tensor<string, []>("op_1922_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1922_cast_fp16 = einsum(equation = var_1922_equation_0, values = (var_1854_cast_fp16_1, var_1904_cast_fp16))[name = tensor<string, []>("op_1922_cast_fp16")];
+            tensor<string, []> var_1924_equation_0 = const()[name = tensor<string, []>("op_1924_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1924_cast_fp16 = einsum(equation = var_1924_equation_0, values = (var_1854_cast_fp16_2, var_1905_cast_fp16))[name = tensor<string, []>("op_1924_cast_fp16")];
+            tensor<string, []> var_1926_equation_0 = const()[name = tensor<string, []>("op_1926_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1926_cast_fp16 = einsum(equation = var_1926_equation_0, values = (var_1854_cast_fp16_3, var_1906_cast_fp16))[name = tensor<string, []>("op_1926_cast_fp16")];
+            tensor<string, []> var_1928_equation_0 = const()[name = tensor<string, []>("op_1928_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1928_cast_fp16 = einsum(equation = var_1928_equation_0, values = (var_1854_cast_fp16_4, var_1907_cast_fp16))[name = tensor<string, []>("op_1928_cast_fp16")];
+            tensor<string, []> var_1930_equation_0 = const()[name = tensor<string, []>("op_1930_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1930_cast_fp16 = einsum(equation = var_1930_equation_0, values = (var_1854_cast_fp16_5, var_1908_cast_fp16))[name = tensor<string, []>("op_1930_cast_fp16")];
+            tensor<string, []> var_1932_equation_0 = const()[name = tensor<string, []>("op_1932_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1932_cast_fp16 = einsum(equation = var_1932_equation_0, values = (var_1854_cast_fp16_6, var_1909_cast_fp16))[name = tensor<string, []>("op_1932_cast_fp16")];
+            tensor<string, []> var_1934_equation_0 = const()[name = tensor<string, []>("op_1934_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1934_cast_fp16 = einsum(equation = var_1934_equation_0, values = (var_1854_cast_fp16_7, var_1910_cast_fp16))[name = tensor<string, []>("op_1934_cast_fp16")];
+            tensor<string, []> var_1936_equation_0 = const()[name = tensor<string, []>("op_1936_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1936_cast_fp16 = einsum(equation = var_1936_equation_0, values = (var_1854_cast_fp16_8, var_1911_cast_fp16))[name = tensor<string, []>("op_1936_cast_fp16")];
+            tensor<string, []> var_1938_equation_0 = const()[name = tensor<string, []>("op_1938_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1938_cast_fp16 = einsum(equation = var_1938_equation_0, values = (var_1854_cast_fp16_9, var_1912_cast_fp16))[name = tensor<string, []>("op_1938_cast_fp16")];
+            tensor<string, []> var_1940_equation_0 = const()[name = tensor<string, []>("op_1940_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1940_cast_fp16 = einsum(equation = var_1940_equation_0, values = (var_1854_cast_fp16_10, var_1913_cast_fp16))[name = tensor<string, []>("op_1940_cast_fp16")];
+            tensor<string, []> var_1942_equation_0 = const()[name = tensor<string, []>("op_1942_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1942_cast_fp16 = einsum(equation = var_1942_equation_0, values = (var_1854_cast_fp16_11, var_1914_cast_fp16))[name = tensor<string, []>("op_1942_cast_fp16")];
+            tensor<string, []> var_1944_equation_0 = const()[name = tensor<string, []>("op_1944_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1944_cast_fp16 = einsum(equation = var_1944_equation_0, values = (var_1854_cast_fp16_12, var_1915_cast_fp16))[name = tensor<string, []>("op_1944_cast_fp16")];
+            tensor<string, []> var_1946_equation_0 = const()[name = tensor<string, []>("op_1946_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1946_cast_fp16 = einsum(equation = var_1946_equation_0, values = (var_1854_cast_fp16_13, var_1916_cast_fp16))[name = tensor<string, []>("op_1946_cast_fp16")];
+            tensor<string, []> var_1948_equation_0 = const()[name = tensor<string, []>("op_1948_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1948_cast_fp16 = einsum(equation = var_1948_equation_0, values = (var_1854_cast_fp16_14, var_1917_cast_fp16))[name = tensor<string, []>("op_1948_cast_fp16")];
+            tensor<string, []> var_1950_equation_0 = const()[name = tensor<string, []>("op_1950_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1950_cast_fp16 = einsum(equation = var_1950_equation_0, values = (var_1854_cast_fp16_15, var_1918_cast_fp16))[name = tensor<string, []>("op_1950_cast_fp16")];
+            tensor<bool, []> input_75_interleave_0 = const()[name = tensor<string, []>("input_75_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_75_cast_fp16 = concat(axis = var_1767, interleave = input_75_interleave_0, values = (var_1920_cast_fp16, var_1922_cast_fp16, var_1924_cast_fp16, var_1926_cast_fp16, var_1928_cast_fp16, var_1930_cast_fp16, var_1932_cast_fp16, var_1934_cast_fp16, var_1936_cast_fp16, var_1938_cast_fp16, var_1940_cast_fp16, var_1942_cast_fp16, var_1944_cast_fp16, var_1946_cast_fp16, var_1948_cast_fp16, var_1950_cast_fp16))[name = tensor<string, []>("input_75_cast_fp16")];
+            tensor<string, []> var_1959_pad_type_0 = const()[name = tensor<string, []>("op_1959_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1959_strides_0 = const()[name = tensor<string, []>("op_1959_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1959_pad_0 = const()[name = tensor<string, []>("op_1959_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1959_dilations_0 = const()[name = tensor<string, []>("op_1959_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1959_groups_0 = const()[name = tensor<string, []>("op_1959_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_7_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192499072)))];
+            tensor<fp16, [1024]> blocks_7_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(194596288)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1959_cast_fp16 = conv(bias = blocks_7_attn_out_bias_to_fp16, dilations = var_1959_dilations_0, groups = var_1959_groups_0, pad = var_1959_pad_0, pad_type = var_1959_pad_type_0, strides = var_1959_strides_0, weight = blocks_7_attn_out_weight_to_fp16, x = input_75_cast_fp16)[name = tensor<string, []>("op_1959_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = var_1959_cast_fp16)[name = tensor<string, []>("inputs_31_cast_fp16")];
+            tensor<int32, [1]> input_77_axes_0 = const()[name = tensor<string, []>("input_77_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_77_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_77_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(194598400)))];
+            tensor<fp16, [1024]> input_77_beta_0_to_fp16 = const()[name = tensor<string, []>("input_77_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(194600512)))];
+            tensor<fp16, []> var_1969_to_fp16 = const()[name = tensor<string, []>("op_1969_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_77_cast_fp16 = layer_norm(axes = input_77_axes_0, beta = input_77_beta_0_to_fp16, epsilon = var_1969_to_fp16, gamma = input_77_gamma_0_to_fp16, x = inputs_31_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
+            tensor<string, []> input_79_pad_type_0 = const()[name = tensor<string, []>("input_79_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_79_strides_0 = const()[name = tensor<string, []>("input_79_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_79_pad_0 = const()[name = tensor<string, []>("input_79_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_79_dilations_0 = const()[name = tensor<string, []>("input_79_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_79_groups_0 = const()[name = tensor<string, []>("input_79_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_7_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(194602624)))];
+            tensor<fp16, [4096]> blocks_7_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202991296)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_79_cast_fp16 = conv(bias = blocks_7_mlp_0_bias_to_fp16, dilations = input_79_dilations_0, groups = input_79_groups_0, pad = input_79_pad_0, pad_type = input_79_pad_type_0, strides = input_79_strides_0, weight = blocks_7_mlp_0_weight_to_fp16, x = input_77_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
+            tensor<string, []> input_81_mode_0 = const()[name = tensor<string, []>("input_81_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_81_cast_fp16 = gelu(mode = input_81_mode_0, x = input_79_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
+            tensor<string, []> var_1995_pad_type_0 = const()[name = tensor<string, []>("op_1995_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1995_strides_0 = const()[name = tensor<string, []>("op_1995_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1995_pad_0 = const()[name = tensor<string, []>("op_1995_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1995_dilations_0 = const()[name = tensor<string, []>("op_1995_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1995_groups_0 = const()[name = tensor<string, []>("op_1995_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_7_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202999552)))];
+            tensor<fp16, [1024]> blocks_7_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211388224)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1995_cast_fp16 = conv(bias = blocks_7_mlp_2_bias_to_fp16, dilations = var_1995_dilations_0, groups = var_1995_groups_0, pad = var_1995_pad_0, pad_type = var_1995_pad_type_0, strides = var_1995_strides_0, weight = blocks_7_mlp_2_weight_to_fp16, x = input_81_cast_fp16)[name = tensor<string, []>("op_1995_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = var_1995_cast_fp16)[name = tensor<string, []>("inputs_33_cast_fp16")];
+            tensor<int32, []> var_2004 = const()[name = tensor<string, []>("op_2004"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_83_axes_0 = const()[name = tensor<string, []>("input_83_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_83_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_83_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211390336)))];
+            tensor<fp16, [1024]> input_83_beta_0_to_fp16 = const()[name = tensor<string, []>("input_83_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211392448)))];
+            tensor<fp16, []> var_2020_to_fp16 = const()[name = tensor<string, []>("op_2020_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_83_cast_fp16 = layer_norm(axes = input_83_axes_0, beta = input_83_beta_0_to_fp16, epsilon = var_2020_to_fp16, gamma = input_83_gamma_0_to_fp16, x = inputs_33_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
+            tensor<string, []> q_17_pad_type_0 = const()[name = tensor<string, []>("q_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_17_strides_0 = const()[name = tensor<string, []>("q_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_17_pad_0 = const()[name = tensor<string, []>("q_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_17_dilations_0 = const()[name = tensor<string, []>("q_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_17_groups_0 = const()[name = tensor<string, []>("q_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_2055_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2055_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211394560)))];
+            tensor<fp16, [1024]> var_2055_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2055_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213491776)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2055_cast_fp16 = conv(bias = var_2055_bias_0_to_fp16, dilations = q_17_dilations_0, groups = q_17_groups_0, pad = q_17_pad_0, pad_type = q_17_pad_type_0, strides = q_17_strides_0, weight = var_2055_weight_0_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("op_2055_cast_fp16")];
+            tensor<string, []> k_17_pad_type_0 = const()[name = tensor<string, []>("k_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_17_strides_0 = const()[name = tensor<string, []>("k_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_17_pad_0 = const()[name = tensor<string, []>("k_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_17_dilations_0 = const()[name = tensor<string, []>("k_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_17_groups_0 = const()[name = tensor<string, []>("k_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_8_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213493888)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_17_cast_fp16 = conv(dilations = k_17_dilations_0, groups = k_17_groups_0, pad = k_17_pad_0, pad_type = k_17_pad_type_0, strides = k_17_strides_0, weight = blocks_8_attn_key_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("k_17_cast_fp16")];
+            tensor<string, []> var_2053_pad_type_0 = const()[name = tensor<string, []>("op_2053_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2053_strides_0 = const()[name = tensor<string, []>("op_2053_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2053_pad_0 = const()[name = tensor<string, []>("op_2053_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2053_dilations_0 = const()[name = tensor<string, []>("op_2053_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2053_groups_0 = const()[name = tensor<string, []>("op_2053_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_8_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(215591104)))];
+            tensor<fp16, [1024]> blocks_8_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217688320)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2053_cast_fp16 = conv(bias = blocks_8_attn_value_bias_to_fp16, dilations = var_2053_dilations_0, groups = var_2053_groups_0, pad = var_2053_pad_0, pad_type = var_2053_pad_type_0, strides = var_2053_strides_0, weight = blocks_8_attn_value_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("op_2053_cast_fp16")];
+            tensor<int32, [16]> tile_24 = const()[name = tensor<string, []>("tile_24"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2056_axis_0 = const()[name = tensor<string, []>("op_2056_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_15 = split(axis = var_2056_axis_0, split_sizes = tile_24, x = var_2055_cast_fp16)[name = tensor<string, []>("op_2056_cast_fp16")];
+            tensor<int32, [4]> var_2073_perm_0 = const()[name = tensor<string, []>("op_2073_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_25 = const()[name = tensor<string, []>("tile_25"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2074_axis_0 = const()[name = tensor<string, []>("op_2074_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_2073_cast_fp16 = transpose(perm = var_2073_perm_0, x = k_17_cast_fp16)[name = tensor<string, []>("transpose_16")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_15 = split(axis = var_2074_axis_0, split_sizes = tile_25, x = var_2073_cast_fp16)[name = tensor<string, []>("op_2074_cast_fp16")];
+            tensor<int32, [16]> tile_26 = const()[name = tensor<string, []>("tile_26"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2091_axis_0 = const()[name = tensor<string, []>("op_2091_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_15 = split(axis = var_2091_axis_0, split_sizes = tile_26, x = var_2053_cast_fp16)[name = tensor<string, []>("op_2091_cast_fp16")];
+            tensor<string, []> aw_257_equation_0 = const()[name = tensor<string, []>("aw_257_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_257_cast_fp16 = einsum(equation = aw_257_equation_0, values = (var_2074_cast_fp16_0, var_2056_cast_fp16_0))[name = tensor<string, []>("aw_257_cast_fp16")];
+            tensor<string, []> aw_259_equation_0 = const()[name = tensor<string, []>("aw_259_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_259_cast_fp16 = einsum(equation = aw_259_equation_0, values = (var_2074_cast_fp16_1, var_2056_cast_fp16_1))[name = tensor<string, []>("aw_259_cast_fp16")];
+            tensor<string, []> aw_261_equation_0 = const()[name = tensor<string, []>("aw_261_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_261_cast_fp16 = einsum(equation = aw_261_equation_0, values = (var_2074_cast_fp16_2, var_2056_cast_fp16_2))[name = tensor<string, []>("aw_261_cast_fp16")];
+            tensor<string, []> aw_263_equation_0 = const()[name = tensor<string, []>("aw_263_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_263_cast_fp16 = einsum(equation = aw_263_equation_0, values = (var_2074_cast_fp16_3, var_2056_cast_fp16_3))[name = tensor<string, []>("aw_263_cast_fp16")];
+            tensor<string, []> aw_265_equation_0 = const()[name = tensor<string, []>("aw_265_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_265_cast_fp16 = einsum(equation = aw_265_equation_0, values = (var_2074_cast_fp16_4, var_2056_cast_fp16_4))[name = tensor<string, []>("aw_265_cast_fp16")];
+            tensor<string, []> aw_267_equation_0 = const()[name = tensor<string, []>("aw_267_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_267_cast_fp16 = einsum(equation = aw_267_equation_0, values = (var_2074_cast_fp16_5, var_2056_cast_fp16_5))[name = tensor<string, []>("aw_267_cast_fp16")];
+            tensor<string, []> aw_269_equation_0 = const()[name = tensor<string, []>("aw_269_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_269_cast_fp16 = einsum(equation = aw_269_equation_0, values = (var_2074_cast_fp16_6, var_2056_cast_fp16_6))[name = tensor<string, []>("aw_269_cast_fp16")];
+            tensor<string, []> aw_271_equation_0 = const()[name = tensor<string, []>("aw_271_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_271_cast_fp16 = einsum(equation = aw_271_equation_0, values = (var_2074_cast_fp16_7, var_2056_cast_fp16_7))[name = tensor<string, []>("aw_271_cast_fp16")];
+            tensor<string, []> aw_273_equation_0 = const()[name = tensor<string, []>("aw_273_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_273_cast_fp16 = einsum(equation = aw_273_equation_0, values = (var_2074_cast_fp16_8, var_2056_cast_fp16_8))[name = tensor<string, []>("aw_273_cast_fp16")];
+            tensor<string, []> aw_275_equation_0 = const()[name = tensor<string, []>("aw_275_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_275_cast_fp16 = einsum(equation = aw_275_equation_0, values = (var_2074_cast_fp16_9, var_2056_cast_fp16_9))[name = tensor<string, []>("aw_275_cast_fp16")];
+            tensor<string, []> aw_277_equation_0 = const()[name = tensor<string, []>("aw_277_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_277_cast_fp16 = einsum(equation = aw_277_equation_0, values = (var_2074_cast_fp16_10, var_2056_cast_fp16_10))[name = tensor<string, []>("aw_277_cast_fp16")];
+            tensor<string, []> aw_279_equation_0 = const()[name = tensor<string, []>("aw_279_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_279_cast_fp16 = einsum(equation = aw_279_equation_0, values = (var_2074_cast_fp16_11, var_2056_cast_fp16_11))[name = tensor<string, []>("aw_279_cast_fp16")];
+            tensor<string, []> aw_281_equation_0 = const()[name = tensor<string, []>("aw_281_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_281_cast_fp16 = einsum(equation = aw_281_equation_0, values = (var_2074_cast_fp16_12, var_2056_cast_fp16_12))[name = tensor<string, []>("aw_281_cast_fp16")];
+            tensor<string, []> aw_283_equation_0 = const()[name = tensor<string, []>("aw_283_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_283_cast_fp16 = einsum(equation = aw_283_equation_0, values = (var_2074_cast_fp16_13, var_2056_cast_fp16_13))[name = tensor<string, []>("aw_283_cast_fp16")];
+            tensor<string, []> aw_285_equation_0 = const()[name = tensor<string, []>("aw_285_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_285_cast_fp16 = einsum(equation = aw_285_equation_0, values = (var_2074_cast_fp16_14, var_2056_cast_fp16_14))[name = tensor<string, []>("aw_285_cast_fp16")];
+            tensor<string, []> aw_287_equation_0 = const()[name = tensor<string, []>("aw_287_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_287_cast_fp16 = einsum(equation = aw_287_equation_0, values = (var_2074_cast_fp16_15, var_2056_cast_fp16_15))[name = tensor<string, []>("aw_287_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2140_cast_fp16 = softmax(axis = var_2004, x = aw_257_cast_fp16)[name = tensor<string, []>("op_2140_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2141_cast_fp16 = softmax(axis = var_2004, x = aw_259_cast_fp16)[name = tensor<string, []>("op_2141_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2142_cast_fp16 = softmax(axis = var_2004, x = aw_261_cast_fp16)[name = tensor<string, []>("op_2142_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2143_cast_fp16 = softmax(axis = var_2004, x = aw_263_cast_fp16)[name = tensor<string, []>("op_2143_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2144_cast_fp16 = softmax(axis = var_2004, x = aw_265_cast_fp16)[name = tensor<string, []>("op_2144_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2145_cast_fp16 = softmax(axis = var_2004, x = aw_267_cast_fp16)[name = tensor<string, []>("op_2145_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2146_cast_fp16 = softmax(axis = var_2004, x = aw_269_cast_fp16)[name = tensor<string, []>("op_2146_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2147_cast_fp16 = softmax(axis = var_2004, x = aw_271_cast_fp16)[name = tensor<string, []>("op_2147_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2148_cast_fp16 = softmax(axis = var_2004, x = aw_273_cast_fp16)[name = tensor<string, []>("op_2148_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2149_cast_fp16 = softmax(axis = var_2004, x = aw_275_cast_fp16)[name = tensor<string, []>("op_2149_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2150_cast_fp16 = softmax(axis = var_2004, x = aw_277_cast_fp16)[name = tensor<string, []>("op_2150_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2151_cast_fp16 = softmax(axis = var_2004, x = aw_279_cast_fp16)[name = tensor<string, []>("op_2151_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2152_cast_fp16 = softmax(axis = var_2004, x = aw_281_cast_fp16)[name = tensor<string, []>("op_2152_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2153_cast_fp16 = softmax(axis = var_2004, x = aw_283_cast_fp16)[name = tensor<string, []>("op_2153_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2154_cast_fp16 = softmax(axis = var_2004, x = aw_285_cast_fp16)[name = tensor<string, []>("op_2154_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2155_cast_fp16 = softmax(axis = var_2004, x = aw_287_cast_fp16)[name = tensor<string, []>("op_2155_cast_fp16")];
+            tensor<string, []> var_2157_equation_0 = const()[name = tensor<string, []>("op_2157_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2157_cast_fp16 = einsum(equation = var_2157_equation_0, values = (var_2091_cast_fp16_0, var_2140_cast_fp16))[name = tensor<string, []>("op_2157_cast_fp16")];
+            tensor<string, []> var_2159_equation_0 = const()[name = tensor<string, []>("op_2159_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2159_cast_fp16 = einsum(equation = var_2159_equation_0, values = (var_2091_cast_fp16_1, var_2141_cast_fp16))[name = tensor<string, []>("op_2159_cast_fp16")];
+            tensor<string, []> var_2161_equation_0 = const()[name = tensor<string, []>("op_2161_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2161_cast_fp16 = einsum(equation = var_2161_equation_0, values = (var_2091_cast_fp16_2, var_2142_cast_fp16))[name = tensor<string, []>("op_2161_cast_fp16")];
+            tensor<string, []> var_2163_equation_0 = const()[name = tensor<string, []>("op_2163_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2163_cast_fp16 = einsum(equation = var_2163_equation_0, values = (var_2091_cast_fp16_3, var_2143_cast_fp16))[name = tensor<string, []>("op_2163_cast_fp16")];
+            tensor<string, []> var_2165_equation_0 = const()[name = tensor<string, []>("op_2165_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2165_cast_fp16 = einsum(equation = var_2165_equation_0, values = (var_2091_cast_fp16_4, var_2144_cast_fp16))[name = tensor<string, []>("op_2165_cast_fp16")];
+            tensor<string, []> var_2167_equation_0 = const()[name = tensor<string, []>("op_2167_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2167_cast_fp16 = einsum(equation = var_2167_equation_0, values = (var_2091_cast_fp16_5, var_2145_cast_fp16))[name = tensor<string, []>("op_2167_cast_fp16")];
+            tensor<string, []> var_2169_equation_0 = const()[name = tensor<string, []>("op_2169_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2169_cast_fp16 = einsum(equation = var_2169_equation_0, values = (var_2091_cast_fp16_6, var_2146_cast_fp16))[name = tensor<string, []>("op_2169_cast_fp16")];
+            tensor<string, []> var_2171_equation_0 = const()[name = tensor<string, []>("op_2171_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2171_cast_fp16 = einsum(equation = var_2171_equation_0, values = (var_2091_cast_fp16_7, var_2147_cast_fp16))[name = tensor<string, []>("op_2171_cast_fp16")];
+            tensor<string, []> var_2173_equation_0 = const()[name = tensor<string, []>("op_2173_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2173_cast_fp16 = einsum(equation = var_2173_equation_0, values = (var_2091_cast_fp16_8, var_2148_cast_fp16))[name = tensor<string, []>("op_2173_cast_fp16")];
+            tensor<string, []> var_2175_equation_0 = const()[name = tensor<string, []>("op_2175_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2175_cast_fp16 = einsum(equation = var_2175_equation_0, values = (var_2091_cast_fp16_9, var_2149_cast_fp16))[name = tensor<string, []>("op_2175_cast_fp16")];
+            tensor<string, []> var_2177_equation_0 = const()[name = tensor<string, []>("op_2177_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2177_cast_fp16 = einsum(equation = var_2177_equation_0, values = (var_2091_cast_fp16_10, var_2150_cast_fp16))[name = tensor<string, []>("op_2177_cast_fp16")];
+            tensor<string, []> var_2179_equation_0 = const()[name = tensor<string, []>("op_2179_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2179_cast_fp16 = einsum(equation = var_2179_equation_0, values = (var_2091_cast_fp16_11, var_2151_cast_fp16))[name = tensor<string, []>("op_2179_cast_fp16")];
+            tensor<string, []> var_2181_equation_0 = const()[name = tensor<string, []>("op_2181_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2181_cast_fp16 = einsum(equation = var_2181_equation_0, values = (var_2091_cast_fp16_12, var_2152_cast_fp16))[name = tensor<string, []>("op_2181_cast_fp16")];
+            tensor<string, []> var_2183_equation_0 = const()[name = tensor<string, []>("op_2183_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2183_cast_fp16 = einsum(equation = var_2183_equation_0, values = (var_2091_cast_fp16_13, var_2153_cast_fp16))[name = tensor<string, []>("op_2183_cast_fp16")];
+            tensor<string, []> var_2185_equation_0 = const()[name = tensor<string, []>("op_2185_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2185_cast_fp16 = einsum(equation = var_2185_equation_0, values = (var_2091_cast_fp16_14, var_2154_cast_fp16))[name = tensor<string, []>("op_2185_cast_fp16")];
+            tensor<string, []> var_2187_equation_0 = const()[name = tensor<string, []>("op_2187_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2187_cast_fp16 = einsum(equation = var_2187_equation_0, values = (var_2091_cast_fp16_15, var_2155_cast_fp16))[name = tensor<string, []>("op_2187_cast_fp16")];
+            tensor<bool, []> input_85_interleave_0 = const()[name = tensor<string, []>("input_85_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_85_cast_fp16 = concat(axis = var_2004, interleave = input_85_interleave_0, values = (var_2157_cast_fp16, var_2159_cast_fp16, var_2161_cast_fp16, var_2163_cast_fp16, var_2165_cast_fp16, var_2167_cast_fp16, var_2169_cast_fp16, var_2171_cast_fp16, var_2173_cast_fp16, var_2175_cast_fp16, var_2177_cast_fp16, var_2179_cast_fp16, var_2181_cast_fp16, var_2183_cast_fp16, var_2185_cast_fp16, var_2187_cast_fp16))[name = tensor<string, []>("input_85_cast_fp16")];
+            tensor<string, []> var_2196_pad_type_0 = const()[name = tensor<string, []>("op_2196_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2196_strides_0 = const()[name = tensor<string, []>("op_2196_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2196_pad_0 = const()[name = tensor<string, []>("op_2196_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2196_dilations_0 = const()[name = tensor<string, []>("op_2196_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2196_groups_0 = const()[name = tensor<string, []>("op_2196_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_8_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217690432)))];
+            tensor<fp16, [1024]> blocks_8_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(219787648)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2196_cast_fp16 = conv(bias = blocks_8_attn_out_bias_to_fp16, dilations = var_2196_dilations_0, groups = var_2196_groups_0, pad = var_2196_pad_0, pad_type = var_2196_pad_type_0, strides = var_2196_strides_0, weight = blocks_8_attn_out_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("op_2196_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = var_2196_cast_fp16)[name = tensor<string, []>("inputs_35_cast_fp16")];
+            tensor<int32, [1]> input_87_axes_0 = const()[name = tensor<string, []>("input_87_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_87_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_87_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(219789760)))];
+            tensor<fp16, [1024]> input_87_beta_0_to_fp16 = const()[name = tensor<string, []>("input_87_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(219791872)))];
+            tensor<fp16, []> var_2206_to_fp16 = const()[name = tensor<string, []>("op_2206_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_87_cast_fp16 = layer_norm(axes = input_87_axes_0, beta = input_87_beta_0_to_fp16, epsilon = var_2206_to_fp16, gamma = input_87_gamma_0_to_fp16, x = inputs_35_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
+            tensor<string, []> input_89_pad_type_0 = const()[name = tensor<string, []>("input_89_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_89_strides_0 = const()[name = tensor<string, []>("input_89_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_89_pad_0 = const()[name = tensor<string, []>("input_89_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_89_dilations_0 = const()[name = tensor<string, []>("input_89_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_89_groups_0 = const()[name = tensor<string, []>("input_89_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_8_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(219793984)))];
+            tensor<fp16, [4096]> blocks_8_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(228182656)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_89_cast_fp16 = conv(bias = blocks_8_mlp_0_bias_to_fp16, dilations = input_89_dilations_0, groups = input_89_groups_0, pad = input_89_pad_0, pad_type = input_89_pad_type_0, strides = input_89_strides_0, weight = blocks_8_mlp_0_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
+            tensor<string, []> input_91_mode_0 = const()[name = tensor<string, []>("input_91_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_91_cast_fp16 = gelu(mode = input_91_mode_0, x = input_89_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
+            tensor<string, []> var_2232_pad_type_0 = const()[name = tensor<string, []>("op_2232_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2232_strides_0 = const()[name = tensor<string, []>("op_2232_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2232_pad_0 = const()[name = tensor<string, []>("op_2232_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2232_dilations_0 = const()[name = tensor<string, []>("op_2232_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2232_groups_0 = const()[name = tensor<string, []>("op_2232_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_8_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(228190912)))];
+            tensor<fp16, [1024]> blocks_8_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236579584)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2232_cast_fp16 = conv(bias = blocks_8_mlp_2_bias_to_fp16, dilations = var_2232_dilations_0, groups = var_2232_groups_0, pad = var_2232_pad_0, pad_type = var_2232_pad_type_0, strides = var_2232_strides_0, weight = blocks_8_mlp_2_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("op_2232_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = var_2232_cast_fp16)[name = tensor<string, []>("inputs_37_cast_fp16")];
+            tensor<int32, []> var_2241 = const()[name = tensor<string, []>("op_2241"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_93_axes_0 = const()[name = tensor<string, []>("input_93_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_93_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_93_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236581696)))];
+            tensor<fp16, [1024]> input_93_beta_0_to_fp16 = const()[name = tensor<string, []>("input_93_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236583808)))];
+            tensor<fp16, []> var_2257_to_fp16 = const()[name = tensor<string, []>("op_2257_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_93_cast_fp16 = layer_norm(axes = input_93_axes_0, beta = input_93_beta_0_to_fp16, epsilon = var_2257_to_fp16, gamma = input_93_gamma_0_to_fp16, x = inputs_37_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
+            tensor<string, []> q_19_pad_type_0 = const()[name = tensor<string, []>("q_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_19_strides_0 = const()[name = tensor<string, []>("q_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_19_pad_0 = const()[name = tensor<string, []>("q_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_19_dilations_0 = const()[name = tensor<string, []>("q_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_19_groups_0 = const()[name = tensor<string, []>("q_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_2292_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2292_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236585920)))];
+            tensor<fp16, [1024]> var_2292_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2292_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238683136)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2292_cast_fp16 = conv(bias = var_2292_bias_0_to_fp16, dilations = q_19_dilations_0, groups = q_19_groups_0, pad = q_19_pad_0, pad_type = q_19_pad_type_0, strides = q_19_strides_0, weight = var_2292_weight_0_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("op_2292_cast_fp16")];
+            tensor<string, []> k_19_pad_type_0 = const()[name = tensor<string, []>("k_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_19_strides_0 = const()[name = tensor<string, []>("k_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_19_pad_0 = const()[name = tensor<string, []>("k_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_19_dilations_0 = const()[name = tensor<string, []>("k_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_19_groups_0 = const()[name = tensor<string, []>("k_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_9_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238685248)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_19_cast_fp16 = conv(dilations = k_19_dilations_0, groups = k_19_groups_0, pad = k_19_pad_0, pad_type = k_19_pad_type_0, strides = k_19_strides_0, weight = blocks_9_attn_key_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("k_19_cast_fp16")];
+            tensor<string, []> var_2290_pad_type_0 = const()[name = tensor<string, []>("op_2290_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2290_strides_0 = const()[name = tensor<string, []>("op_2290_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2290_pad_0 = const()[name = tensor<string, []>("op_2290_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2290_dilations_0 = const()[name = tensor<string, []>("op_2290_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2290_groups_0 = const()[name = tensor<string, []>("op_2290_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_9_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240782464)))];
+            tensor<fp16, [1024]> blocks_9_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242879680)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2290_cast_fp16 = conv(bias = blocks_9_attn_value_bias_to_fp16, dilations = var_2290_dilations_0, groups = var_2290_groups_0, pad = var_2290_pad_0, pad_type = var_2290_pad_type_0, strides = var_2290_strides_0, weight = blocks_9_attn_value_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("op_2290_cast_fp16")];
+            tensor<int32, [16]> tile_27 = const()[name = tensor<string, []>("tile_27"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2293_axis_0 = const()[name = tensor<string, []>("op_2293_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_15 = split(axis = var_2293_axis_0, split_sizes = tile_27, x = var_2292_cast_fp16)[name = tensor<string, []>("op_2293_cast_fp16")];
+            tensor<int32, [4]> var_2310_perm_0 = const()[name = tensor<string, []>("op_2310_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_28 = const()[name = tensor<string, []>("tile_28"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2311_axis_0 = const()[name = tensor<string, []>("op_2311_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_2310_cast_fp16 = transpose(perm = var_2310_perm_0, x = k_19_cast_fp16)[name = tensor<string, []>("transpose_15")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_15 = split(axis = var_2311_axis_0, split_sizes = tile_28, x = var_2310_cast_fp16)[name = tensor<string, []>("op_2311_cast_fp16")];
+            tensor<int32, [16]> tile_29 = const()[name = tensor<string, []>("tile_29"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2328_axis_0 = const()[name = tensor<string, []>("op_2328_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_15 = split(axis = var_2328_axis_0, split_sizes = tile_29, x = var_2290_cast_fp16)[name = tensor<string, []>("op_2328_cast_fp16")];
+            tensor<string, []> aw_289_equation_0 = const()[name = tensor<string, []>("aw_289_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_289_cast_fp16 = einsum(equation = aw_289_equation_0, values = (var_2311_cast_fp16_0, var_2293_cast_fp16_0))[name = tensor<string, []>("aw_289_cast_fp16")];
+            tensor<string, []> aw_291_equation_0 = const()[name = tensor<string, []>("aw_291_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_291_cast_fp16 = einsum(equation = aw_291_equation_0, values = (var_2311_cast_fp16_1, var_2293_cast_fp16_1))[name = tensor<string, []>("aw_291_cast_fp16")];
+            tensor<string, []> aw_293_equation_0 = const()[name = tensor<string, []>("aw_293_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_293_cast_fp16 = einsum(equation = aw_293_equation_0, values = (var_2311_cast_fp16_2, var_2293_cast_fp16_2))[name = tensor<string, []>("aw_293_cast_fp16")];
+            tensor<string, []> aw_295_equation_0 = const()[name = tensor<string, []>("aw_295_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_295_cast_fp16 = einsum(equation = aw_295_equation_0, values = (var_2311_cast_fp16_3, var_2293_cast_fp16_3))[name = tensor<string, []>("aw_295_cast_fp16")];
+            tensor<string, []> aw_297_equation_0 = const()[name = tensor<string, []>("aw_297_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_297_cast_fp16 = einsum(equation = aw_297_equation_0, values = (var_2311_cast_fp16_4, var_2293_cast_fp16_4))[name = tensor<string, []>("aw_297_cast_fp16")];
+            tensor<string, []> aw_299_equation_0 = const()[name = tensor<string, []>("aw_299_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_299_cast_fp16 = einsum(equation = aw_299_equation_0, values = (var_2311_cast_fp16_5, var_2293_cast_fp16_5))[name = tensor<string, []>("aw_299_cast_fp16")];
+            tensor<string, []> aw_301_equation_0 = const()[name = tensor<string, []>("aw_301_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_301_cast_fp16 = einsum(equation = aw_301_equation_0, values = (var_2311_cast_fp16_6, var_2293_cast_fp16_6))[name = tensor<string, []>("aw_301_cast_fp16")];
+            tensor<string, []> aw_303_equation_0 = const()[name = tensor<string, []>("aw_303_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_303_cast_fp16 = einsum(equation = aw_303_equation_0, values = (var_2311_cast_fp16_7, var_2293_cast_fp16_7))[name = tensor<string, []>("aw_303_cast_fp16")];
+            tensor<string, []> aw_305_equation_0 = const()[name = tensor<string, []>("aw_305_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_305_cast_fp16 = einsum(equation = aw_305_equation_0, values = (var_2311_cast_fp16_8, var_2293_cast_fp16_8))[name = tensor<string, []>("aw_305_cast_fp16")];
+            tensor<string, []> aw_307_equation_0 = const()[name = tensor<string, []>("aw_307_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_307_cast_fp16 = einsum(equation = aw_307_equation_0, values = (var_2311_cast_fp16_9, var_2293_cast_fp16_9))[name = tensor<string, []>("aw_307_cast_fp16")];
+            tensor<string, []> aw_309_equation_0 = const()[name = tensor<string, []>("aw_309_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_309_cast_fp16 = einsum(equation = aw_309_equation_0, values = (var_2311_cast_fp16_10, var_2293_cast_fp16_10))[name = tensor<string, []>("aw_309_cast_fp16")];
+            tensor<string, []> aw_311_equation_0 = const()[name = tensor<string, []>("aw_311_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_311_cast_fp16 = einsum(equation = aw_311_equation_0, values = (var_2311_cast_fp16_11, var_2293_cast_fp16_11))[name = tensor<string, []>("aw_311_cast_fp16")];
+            tensor<string, []> aw_313_equation_0 = const()[name = tensor<string, []>("aw_313_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_313_cast_fp16 = einsum(equation = aw_313_equation_0, values = (var_2311_cast_fp16_12, var_2293_cast_fp16_12))[name = tensor<string, []>("aw_313_cast_fp16")];
+            tensor<string, []> aw_315_equation_0 = const()[name = tensor<string, []>("aw_315_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_315_cast_fp16 = einsum(equation = aw_315_equation_0, values = (var_2311_cast_fp16_13, var_2293_cast_fp16_13))[name = tensor<string, []>("aw_315_cast_fp16")];
+            tensor<string, []> aw_317_equation_0 = const()[name = tensor<string, []>("aw_317_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_317_cast_fp16 = einsum(equation = aw_317_equation_0, values = (var_2311_cast_fp16_14, var_2293_cast_fp16_14))[name = tensor<string, []>("aw_317_cast_fp16")];
+            tensor<string, []> aw_319_equation_0 = const()[name = tensor<string, []>("aw_319_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_319_cast_fp16 = einsum(equation = aw_319_equation_0, values = (var_2311_cast_fp16_15, var_2293_cast_fp16_15))[name = tensor<string, []>("aw_319_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2377_cast_fp16 = softmax(axis = var_2241, x = aw_289_cast_fp16)[name = tensor<string, []>("op_2377_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2378_cast_fp16 = softmax(axis = var_2241, x = aw_291_cast_fp16)[name = tensor<string, []>("op_2378_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2379_cast_fp16 = softmax(axis = var_2241, x = aw_293_cast_fp16)[name = tensor<string, []>("op_2379_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2380_cast_fp16 = softmax(axis = var_2241, x = aw_295_cast_fp16)[name = tensor<string, []>("op_2380_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2381_cast_fp16 = softmax(axis = var_2241, x = aw_297_cast_fp16)[name = tensor<string, []>("op_2381_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2382_cast_fp16 = softmax(axis = var_2241, x = aw_299_cast_fp16)[name = tensor<string, []>("op_2382_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2383_cast_fp16 = softmax(axis = var_2241, x = aw_301_cast_fp16)[name = tensor<string, []>("op_2383_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2384_cast_fp16 = softmax(axis = var_2241, x = aw_303_cast_fp16)[name = tensor<string, []>("op_2384_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2385_cast_fp16 = softmax(axis = var_2241, x = aw_305_cast_fp16)[name = tensor<string, []>("op_2385_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2386_cast_fp16 = softmax(axis = var_2241, x = aw_307_cast_fp16)[name = tensor<string, []>("op_2386_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2387_cast_fp16 = softmax(axis = var_2241, x = aw_309_cast_fp16)[name = tensor<string, []>("op_2387_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2388_cast_fp16 = softmax(axis = var_2241, x = aw_311_cast_fp16)[name = tensor<string, []>("op_2388_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2389_cast_fp16 = softmax(axis = var_2241, x = aw_313_cast_fp16)[name = tensor<string, []>("op_2389_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2390_cast_fp16 = softmax(axis = var_2241, x = aw_315_cast_fp16)[name = tensor<string, []>("op_2390_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2391_cast_fp16 = softmax(axis = var_2241, x = aw_317_cast_fp16)[name = tensor<string, []>("op_2391_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2392_cast_fp16 = softmax(axis = var_2241, x = aw_319_cast_fp16)[name = tensor<string, []>("op_2392_cast_fp16")];
+            tensor<string, []> var_2394_equation_0 = const()[name = tensor<string, []>("op_2394_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2394_cast_fp16 = einsum(equation = var_2394_equation_0, values = (var_2328_cast_fp16_0, var_2377_cast_fp16))[name = tensor<string, []>("op_2394_cast_fp16")];
+            tensor<string, []> var_2396_equation_0 = const()[name = tensor<string, []>("op_2396_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2396_cast_fp16 = einsum(equation = var_2396_equation_0, values = (var_2328_cast_fp16_1, var_2378_cast_fp16))[name = tensor<string, []>("op_2396_cast_fp16")];
+            tensor<string, []> var_2398_equation_0 = const()[name = tensor<string, []>("op_2398_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2398_cast_fp16 = einsum(equation = var_2398_equation_0, values = (var_2328_cast_fp16_2, var_2379_cast_fp16))[name = tensor<string, []>("op_2398_cast_fp16")];
+            tensor<string, []> var_2400_equation_0 = const()[name = tensor<string, []>("op_2400_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2400_cast_fp16 = einsum(equation = var_2400_equation_0, values = (var_2328_cast_fp16_3, var_2380_cast_fp16))[name = tensor<string, []>("op_2400_cast_fp16")];
+            tensor<string, []> var_2402_equation_0 = const()[name = tensor<string, []>("op_2402_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2402_cast_fp16 = einsum(equation = var_2402_equation_0, values = (var_2328_cast_fp16_4, var_2381_cast_fp16))[name = tensor<string, []>("op_2402_cast_fp16")];
+            tensor<string, []> var_2404_equation_0 = const()[name = tensor<string, []>("op_2404_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2404_cast_fp16 = einsum(equation = var_2404_equation_0, values = (var_2328_cast_fp16_5, var_2382_cast_fp16))[name = tensor<string, []>("op_2404_cast_fp16")];
+            tensor<string, []> var_2406_equation_0 = const()[name = tensor<string, []>("op_2406_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2406_cast_fp16 = einsum(equation = var_2406_equation_0, values = (var_2328_cast_fp16_6, var_2383_cast_fp16))[name = tensor<string, []>("op_2406_cast_fp16")];
+            tensor<string, []> var_2408_equation_0 = const()[name = tensor<string, []>("op_2408_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2408_cast_fp16 = einsum(equation = var_2408_equation_0, values = (var_2328_cast_fp16_7, var_2384_cast_fp16))[name = tensor<string, []>("op_2408_cast_fp16")];
+            tensor<string, []> var_2410_equation_0 = const()[name = tensor<string, []>("op_2410_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2410_cast_fp16 = einsum(equation = var_2410_equation_0, values = (var_2328_cast_fp16_8, var_2385_cast_fp16))[name = tensor<string, []>("op_2410_cast_fp16")];
+            tensor<string, []> var_2412_equation_0 = const()[name = tensor<string, []>("op_2412_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2412_cast_fp16 = einsum(equation = var_2412_equation_0, values = (var_2328_cast_fp16_9, var_2386_cast_fp16))[name = tensor<string, []>("op_2412_cast_fp16")];
+            tensor<string, []> var_2414_equation_0 = const()[name = tensor<string, []>("op_2414_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2414_cast_fp16 = einsum(equation = var_2414_equation_0, values = (var_2328_cast_fp16_10, var_2387_cast_fp16))[name = tensor<string, []>("op_2414_cast_fp16")];
+            tensor<string, []> var_2416_equation_0 = const()[name = tensor<string, []>("op_2416_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2416_cast_fp16 = einsum(equation = var_2416_equation_0, values = (var_2328_cast_fp16_11, var_2388_cast_fp16))[name = tensor<string, []>("op_2416_cast_fp16")];
+            tensor<string, []> var_2418_equation_0 = const()[name = tensor<string, []>("op_2418_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16 = einsum(equation = var_2418_equation_0, values = (var_2328_cast_fp16_12, var_2389_cast_fp16))[name = tensor<string, []>("op_2418_cast_fp16")];
+            tensor<string, []> var_2420_equation_0 = const()[name = tensor<string, []>("op_2420_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2420_cast_fp16 = einsum(equation = var_2420_equation_0, values = (var_2328_cast_fp16_13, var_2390_cast_fp16))[name = tensor<string, []>("op_2420_cast_fp16")];
+            tensor<string, []> var_2422_equation_0 = const()[name = tensor<string, []>("op_2422_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2422_cast_fp16 = einsum(equation = var_2422_equation_0, values = (var_2328_cast_fp16_14, var_2391_cast_fp16))[name = tensor<string, []>("op_2422_cast_fp16")];
+            tensor<string, []> var_2424_equation_0 = const()[name = tensor<string, []>("op_2424_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2424_cast_fp16 = einsum(equation = var_2424_equation_0, values = (var_2328_cast_fp16_15, var_2392_cast_fp16))[name = tensor<string, []>("op_2424_cast_fp16")];
+            tensor<bool, []> input_95_interleave_0 = const()[name = tensor<string, []>("input_95_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_95_cast_fp16 = concat(axis = var_2241, interleave = input_95_interleave_0, values = (var_2394_cast_fp16, var_2396_cast_fp16, var_2398_cast_fp16, var_2400_cast_fp16, var_2402_cast_fp16, var_2404_cast_fp16, var_2406_cast_fp16, var_2408_cast_fp16, var_2410_cast_fp16, var_2412_cast_fp16, var_2414_cast_fp16, var_2416_cast_fp16, var_2418_cast_fp16, var_2420_cast_fp16, var_2422_cast_fp16, var_2424_cast_fp16))[name = tensor<string, []>("input_95_cast_fp16")];
+            tensor<string, []> var_2433_pad_type_0 = const()[name = tensor<string, []>("op_2433_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2433_strides_0 = const()[name = tensor<string, []>("op_2433_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2433_pad_0 = const()[name = tensor<string, []>("op_2433_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2433_dilations_0 = const()[name = tensor<string, []>("op_2433_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2433_groups_0 = const()[name = tensor<string, []>("op_2433_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_9_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242881792)))];
+            tensor<fp16, [1024]> blocks_9_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244979008)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2433_cast_fp16 = conv(bias = blocks_9_attn_out_bias_to_fp16, dilations = var_2433_dilations_0, groups = var_2433_groups_0, pad = var_2433_pad_0, pad_type = var_2433_pad_type_0, strides = var_2433_strides_0, weight = blocks_9_attn_out_weight_to_fp16, x = input_95_cast_fp16)[name = tensor<string, []>("op_2433_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = var_2433_cast_fp16)[name = tensor<string, []>("inputs_39_cast_fp16")];
+            tensor<int32, [1]> input_97_axes_0 = const()[name = tensor<string, []>("input_97_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_97_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_97_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244981120)))];
+            tensor<fp16, [1024]> input_97_beta_0_to_fp16 = const()[name = tensor<string, []>("input_97_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244983232)))];
+            tensor<fp16, []> var_2443_to_fp16 = const()[name = tensor<string, []>("op_2443_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_97_cast_fp16 = layer_norm(axes = input_97_axes_0, beta = input_97_beta_0_to_fp16, epsilon = var_2443_to_fp16, gamma = input_97_gamma_0_to_fp16, x = inputs_39_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")];
+            tensor<string, []> input_99_pad_type_0 = const()[name = tensor<string, []>("input_99_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_99_strides_0 = const()[name = tensor<string, []>("input_99_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_99_pad_0 = const()[name = tensor<string, []>("input_99_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_99_dilations_0 = const()[name = tensor<string, []>("input_99_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_99_groups_0 = const()[name = tensor<string, []>("input_99_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_9_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244985344)))];
+            tensor<fp16, [4096]> blocks_9_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(253374016)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_99_cast_fp16 = conv(bias = blocks_9_mlp_0_bias_to_fp16, dilations = input_99_dilations_0, groups = input_99_groups_0, pad = input_99_pad_0, pad_type = input_99_pad_type_0, strides = input_99_strides_0, weight = blocks_9_mlp_0_weight_to_fp16, x = input_97_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
+            tensor<string, []> input_101_mode_0 = const()[name = tensor<string, []>("input_101_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_101_cast_fp16 = gelu(mode = input_101_mode_0, x = input_99_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
+            tensor<string, []> var_2469_pad_type_0 = const()[name = tensor<string, []>("op_2469_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2469_strides_0 = const()[name = tensor<string, []>("op_2469_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2469_pad_0 = const()[name = tensor<string, []>("op_2469_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2469_dilations_0 = const()[name = tensor<string, []>("op_2469_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2469_groups_0 = const()[name = tensor<string, []>("op_2469_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_9_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(253382272)))];
+            tensor<fp16, [1024]> blocks_9_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261770944)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2469_cast_fp16 = conv(bias = blocks_9_mlp_2_bias_to_fp16, dilations = var_2469_dilations_0, groups = var_2469_groups_0, pad = var_2469_pad_0, pad_type = var_2469_pad_type_0, strides = var_2469_strides_0, weight = blocks_9_mlp_2_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("op_2469_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = var_2469_cast_fp16)[name = tensor<string, []>("inputs_41_cast_fp16")];
+            tensor<int32, []> var_2478 = const()[name = tensor<string, []>("op_2478"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_103_axes_0 = const()[name = tensor<string, []>("input_103_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_103_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_103_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261773056)))];
+            tensor<fp16, [1024]> input_103_beta_0_to_fp16 = const()[name = tensor<string, []>("input_103_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261775168)))];
+            tensor<fp16, []> var_2494_to_fp16 = const()[name = tensor<string, []>("op_2494_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_103_cast_fp16 = layer_norm(axes = input_103_axes_0, beta = input_103_beta_0_to_fp16, epsilon = var_2494_to_fp16, gamma = input_103_gamma_0_to_fp16, x = inputs_41_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
+            tensor<string, []> q_21_pad_type_0 = const()[name = tensor<string, []>("q_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_21_strides_0 = const()[name = tensor<string, []>("q_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_21_pad_0 = const()[name = tensor<string, []>("q_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_21_dilations_0 = const()[name = tensor<string, []>("q_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_21_groups_0 = const()[name = tensor<string, []>("q_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_2529_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2529_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261777280)))];
+            tensor<fp16, [1024]> var_2529_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2529_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263874496)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2529_cast_fp16 = conv(bias = var_2529_bias_0_to_fp16, dilations = q_21_dilations_0, groups = q_21_groups_0, pad = q_21_pad_0, pad_type = q_21_pad_type_0, strides = q_21_strides_0, weight = var_2529_weight_0_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("op_2529_cast_fp16")];
+            tensor<string, []> k_21_pad_type_0 = const()[name = tensor<string, []>("k_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_21_strides_0 = const()[name = tensor<string, []>("k_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_21_pad_0 = const()[name = tensor<string, []>("k_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_21_dilations_0 = const()[name = tensor<string, []>("k_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_21_groups_0 = const()[name = tensor<string, []>("k_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_10_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263876608)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_21_cast_fp16 = conv(dilations = k_21_dilations_0, groups = k_21_groups_0, pad = k_21_pad_0, pad_type = k_21_pad_type_0, strides = k_21_strides_0, weight = blocks_10_attn_key_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("k_21_cast_fp16")];
+            tensor<string, []> var_2527_pad_type_0 = const()[name = tensor<string, []>("op_2527_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2527_strides_0 = const()[name = tensor<string, []>("op_2527_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2527_pad_0 = const()[name = tensor<string, []>("op_2527_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2527_dilations_0 = const()[name = tensor<string, []>("op_2527_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2527_groups_0 = const()[name = tensor<string, []>("op_2527_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_10_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(265973824)))];
+            tensor<fp16, [1024]> blocks_10_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268071040)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2527_cast_fp16 = conv(bias = blocks_10_attn_value_bias_to_fp16, dilations = var_2527_dilations_0, groups = var_2527_groups_0, pad = var_2527_pad_0, pad_type = var_2527_pad_type_0, strides = var_2527_strides_0, weight = blocks_10_attn_value_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("op_2527_cast_fp16")];
+            tensor<int32, [16]> tile_30 = const()[name = tensor<string, []>("tile_30"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2530_axis_0 = const()[name = tensor<string, []>("op_2530_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_15 = split(axis = var_2530_axis_0, split_sizes = tile_30, x = var_2529_cast_fp16)[name = tensor<string, []>("op_2530_cast_fp16")];
+            tensor<int32, [4]> var_2547_perm_0 = const()[name = tensor<string, []>("op_2547_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_31 = const()[name = tensor<string, []>("tile_31"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2548_axis_0 = const()[name = tensor<string, []>("op_2548_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_2547_cast_fp16 = transpose(perm = var_2547_perm_0, x = k_21_cast_fp16)[name = tensor<string, []>("transpose_14")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_15 = split(axis = var_2548_axis_0, split_sizes = tile_31, x = var_2547_cast_fp16)[name = tensor<string, []>("op_2548_cast_fp16")];
+            tensor<int32, [16]> tile_32 = const()[name = tensor<string, []>("tile_32"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2565_axis_0 = const()[name = tensor<string, []>("op_2565_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_15 = split(axis = var_2565_axis_0, split_sizes = tile_32, x = var_2527_cast_fp16)[name = tensor<string, []>("op_2565_cast_fp16")];
+            tensor<string, []> aw_321_equation_0 = const()[name = tensor<string, []>("aw_321_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_321_cast_fp16 = einsum(equation = aw_321_equation_0, values = (var_2548_cast_fp16_0, var_2530_cast_fp16_0))[name = tensor<string, []>("aw_321_cast_fp16")];
+            tensor<string, []> aw_323_equation_0 = const()[name = tensor<string, []>("aw_323_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_323_cast_fp16 = einsum(equation = aw_323_equation_0, values = (var_2548_cast_fp16_1, var_2530_cast_fp16_1))[name = tensor<string, []>("aw_323_cast_fp16")];
+            tensor<string, []> aw_325_equation_0 = const()[name = tensor<string, []>("aw_325_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_325_cast_fp16 = einsum(equation = aw_325_equation_0, values = (var_2548_cast_fp16_2, var_2530_cast_fp16_2))[name = tensor<string, []>("aw_325_cast_fp16")];
+            tensor<string, []> aw_327_equation_0 = const()[name = tensor<string, []>("aw_327_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_327_cast_fp16 = einsum(equation = aw_327_equation_0, values = (var_2548_cast_fp16_3, var_2530_cast_fp16_3))[name = tensor<string, []>("aw_327_cast_fp16")];
+            tensor<string, []> aw_329_equation_0 = const()[name = tensor<string, []>("aw_329_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_329_cast_fp16 = einsum(equation = aw_329_equation_0, values = (var_2548_cast_fp16_4, var_2530_cast_fp16_4))[name = tensor<string, []>("aw_329_cast_fp16")];
+            tensor<string, []> aw_331_equation_0 = const()[name = tensor<string, []>("aw_331_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_331_cast_fp16 = einsum(equation = aw_331_equation_0, values = (var_2548_cast_fp16_5, var_2530_cast_fp16_5))[name = tensor<string, []>("aw_331_cast_fp16")];
+            tensor<string, []> aw_333_equation_0 = const()[name = tensor<string, []>("aw_333_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_333_cast_fp16 = einsum(equation = aw_333_equation_0, values = (var_2548_cast_fp16_6, var_2530_cast_fp16_6))[name = tensor<string, []>("aw_333_cast_fp16")];
+            tensor<string, []> aw_335_equation_0 = const()[name = tensor<string, []>("aw_335_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_335_cast_fp16 = einsum(equation = aw_335_equation_0, values = (var_2548_cast_fp16_7, var_2530_cast_fp16_7))[name = tensor<string, []>("aw_335_cast_fp16")];
+            tensor<string, []> aw_337_equation_0 = const()[name = tensor<string, []>("aw_337_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_337_cast_fp16 = einsum(equation = aw_337_equation_0, values = (var_2548_cast_fp16_8, var_2530_cast_fp16_8))[name = tensor<string, []>("aw_337_cast_fp16")];
+            tensor<string, []> aw_339_equation_0 = const()[name = tensor<string, []>("aw_339_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_339_cast_fp16 = einsum(equation = aw_339_equation_0, values = (var_2548_cast_fp16_9, var_2530_cast_fp16_9))[name = tensor<string, []>("aw_339_cast_fp16")];
+            tensor<string, []> aw_341_equation_0 = const()[name = tensor<string, []>("aw_341_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_341_cast_fp16 = einsum(equation = aw_341_equation_0, values = (var_2548_cast_fp16_10, var_2530_cast_fp16_10))[name = tensor<string, []>("aw_341_cast_fp16")];
+            tensor<string, []> aw_343_equation_0 = const()[name = tensor<string, []>("aw_343_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_343_cast_fp16 = einsum(equation = aw_343_equation_0, values = (var_2548_cast_fp16_11, var_2530_cast_fp16_11))[name = tensor<string, []>("aw_343_cast_fp16")];
+            tensor<string, []> aw_345_equation_0 = const()[name = tensor<string, []>("aw_345_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_345_cast_fp16 = einsum(equation = aw_345_equation_0, values = (var_2548_cast_fp16_12, var_2530_cast_fp16_12))[name = tensor<string, []>("aw_345_cast_fp16")];
+            tensor<string, []> aw_347_equation_0 = const()[name = tensor<string, []>("aw_347_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_347_cast_fp16 = einsum(equation = aw_347_equation_0, values = (var_2548_cast_fp16_13, var_2530_cast_fp16_13))[name = tensor<string, []>("aw_347_cast_fp16")];
+            tensor<string, []> aw_349_equation_0 = const()[name = tensor<string, []>("aw_349_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_349_cast_fp16 = einsum(equation = aw_349_equation_0, values = (var_2548_cast_fp16_14, var_2530_cast_fp16_14))[name = tensor<string, []>("aw_349_cast_fp16")];
+            tensor<string, []> aw_351_equation_0 = const()[name = tensor<string, []>("aw_351_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_351_cast_fp16 = einsum(equation = aw_351_equation_0, values = (var_2548_cast_fp16_15, var_2530_cast_fp16_15))[name = tensor<string, []>("aw_351_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2614_cast_fp16 = softmax(axis = var_2478, x = aw_321_cast_fp16)[name = tensor<string, []>("op_2614_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2615_cast_fp16 = softmax(axis = var_2478, x = aw_323_cast_fp16)[name = tensor<string, []>("op_2615_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2616_cast_fp16 = softmax(axis = var_2478, x = aw_325_cast_fp16)[name = tensor<string, []>("op_2616_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2617_cast_fp16 = softmax(axis = var_2478, x = aw_327_cast_fp16)[name = tensor<string, []>("op_2617_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2618_cast_fp16 = softmax(axis = var_2478, x = aw_329_cast_fp16)[name = tensor<string, []>("op_2618_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2619_cast_fp16 = softmax(axis = var_2478, x = aw_331_cast_fp16)[name = tensor<string, []>("op_2619_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2620_cast_fp16 = softmax(axis = var_2478, x = aw_333_cast_fp16)[name = tensor<string, []>("op_2620_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2621_cast_fp16 = softmax(axis = var_2478, x = aw_335_cast_fp16)[name = tensor<string, []>("op_2621_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2622_cast_fp16 = softmax(axis = var_2478, x = aw_337_cast_fp16)[name = tensor<string, []>("op_2622_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2623_cast_fp16 = softmax(axis = var_2478, x = aw_339_cast_fp16)[name = tensor<string, []>("op_2623_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2624_cast_fp16 = softmax(axis = var_2478, x = aw_341_cast_fp16)[name = tensor<string, []>("op_2624_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2625_cast_fp16 = softmax(axis = var_2478, x = aw_343_cast_fp16)[name = tensor<string, []>("op_2625_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2626_cast_fp16 = softmax(axis = var_2478, x = aw_345_cast_fp16)[name = tensor<string, []>("op_2626_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2627_cast_fp16 = softmax(axis = var_2478, x = aw_347_cast_fp16)[name = tensor<string, []>("op_2627_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2628_cast_fp16 = softmax(axis = var_2478, x = aw_349_cast_fp16)[name = tensor<string, []>("op_2628_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2629_cast_fp16 = softmax(axis = var_2478, x = aw_351_cast_fp16)[name = tensor<string, []>("op_2629_cast_fp16")];
+            tensor<string, []> var_2631_equation_0 = const()[name = tensor<string, []>("op_2631_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2631_cast_fp16 = einsum(equation = var_2631_equation_0, values = (var_2565_cast_fp16_0, var_2614_cast_fp16))[name = tensor<string, []>("op_2631_cast_fp16")];
+            tensor<string, []> var_2633_equation_0 = const()[name = tensor<string, []>("op_2633_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2633_cast_fp16 = einsum(equation = var_2633_equation_0, values = (var_2565_cast_fp16_1, var_2615_cast_fp16))[name = tensor<string, []>("op_2633_cast_fp16")];
+            tensor<string, []> var_2635_equation_0 = const()[name = tensor<string, []>("op_2635_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2635_cast_fp16 = einsum(equation = var_2635_equation_0, values = (var_2565_cast_fp16_2, var_2616_cast_fp16))[name = tensor<string, []>("op_2635_cast_fp16")];
+            tensor<string, []> var_2637_equation_0 = const()[name = tensor<string, []>("op_2637_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2637_cast_fp16 = einsum(equation = var_2637_equation_0, values = (var_2565_cast_fp16_3, var_2617_cast_fp16))[name = tensor<string, []>("op_2637_cast_fp16")];
+            tensor<string, []> var_2639_equation_0 = const()[name = tensor<string, []>("op_2639_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2639_cast_fp16 = einsum(equation = var_2639_equation_0, values = (var_2565_cast_fp16_4, var_2618_cast_fp16))[name = tensor<string, []>("op_2639_cast_fp16")];
+            tensor<string, []> var_2641_equation_0 = const()[name = tensor<string, []>("op_2641_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2641_cast_fp16 = einsum(equation = var_2641_equation_0, values = (var_2565_cast_fp16_5, var_2619_cast_fp16))[name = tensor<string, []>("op_2641_cast_fp16")];
+            tensor<string, []> var_2643_equation_0 = const()[name = tensor<string, []>("op_2643_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2643_cast_fp16 = einsum(equation = var_2643_equation_0, values = (var_2565_cast_fp16_6, var_2620_cast_fp16))[name = tensor<string, []>("op_2643_cast_fp16")];
+            tensor<string, []> var_2645_equation_0 = const()[name = tensor<string, []>("op_2645_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2645_cast_fp16 = einsum(equation = var_2645_equation_0, values = (var_2565_cast_fp16_7, var_2621_cast_fp16))[name = tensor<string, []>("op_2645_cast_fp16")];
+            tensor<string, []> var_2647_equation_0 = const()[name = tensor<string, []>("op_2647_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2647_cast_fp16 = einsum(equation = var_2647_equation_0, values = (var_2565_cast_fp16_8, var_2622_cast_fp16))[name = tensor<string, []>("op_2647_cast_fp16")];
+            tensor<string, []> var_2649_equation_0 = const()[name = tensor<string, []>("op_2649_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2649_cast_fp16 = einsum(equation = var_2649_equation_0, values = (var_2565_cast_fp16_9, var_2623_cast_fp16))[name = tensor<string, []>("op_2649_cast_fp16")];
+            tensor<string, []> var_2651_equation_0 = const()[name = tensor<string, []>("op_2651_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2651_cast_fp16 = einsum(equation = var_2651_equation_0, values = (var_2565_cast_fp16_10, var_2624_cast_fp16))[name = tensor<string, []>("op_2651_cast_fp16")];
+            tensor<string, []> var_2653_equation_0 = const()[name = tensor<string, []>("op_2653_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2653_cast_fp16 = einsum(equation = var_2653_equation_0, values = (var_2565_cast_fp16_11, var_2625_cast_fp16))[name = tensor<string, []>("op_2653_cast_fp16")];
+            tensor<string, []> var_2655_equation_0 = const()[name = tensor<string, []>("op_2655_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2655_cast_fp16 = einsum(equation = var_2655_equation_0, values = (var_2565_cast_fp16_12, var_2626_cast_fp16))[name = tensor<string, []>("op_2655_cast_fp16")];
+            tensor<string, []> var_2657_equation_0 = const()[name = tensor<string, []>("op_2657_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2657_cast_fp16 = einsum(equation = var_2657_equation_0, values = (var_2565_cast_fp16_13, var_2627_cast_fp16))[name = tensor<string, []>("op_2657_cast_fp16")];
+            tensor<string, []> var_2659_equation_0 = const()[name = tensor<string, []>("op_2659_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2659_cast_fp16 = einsum(equation = var_2659_equation_0, values = (var_2565_cast_fp16_14, var_2628_cast_fp16))[name = tensor<string, []>("op_2659_cast_fp16")];
+            tensor<string, []> var_2661_equation_0 = const()[name = tensor<string, []>("op_2661_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2661_cast_fp16 = einsum(equation = var_2661_equation_0, values = (var_2565_cast_fp16_15, var_2629_cast_fp16))[name = tensor<string, []>("op_2661_cast_fp16")];
+            tensor<bool, []> input_105_interleave_0 = const()[name = tensor<string, []>("input_105_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_105_cast_fp16 = concat(axis = var_2478, interleave = input_105_interleave_0, values = (var_2631_cast_fp16, var_2633_cast_fp16, var_2635_cast_fp16, var_2637_cast_fp16, var_2639_cast_fp16, var_2641_cast_fp16, var_2643_cast_fp16, var_2645_cast_fp16, var_2647_cast_fp16, var_2649_cast_fp16, var_2651_cast_fp16, var_2653_cast_fp16, var_2655_cast_fp16, var_2657_cast_fp16, var_2659_cast_fp16, var_2661_cast_fp16))[name = tensor<string, []>("input_105_cast_fp16")];
+            tensor<string, []> var_2670_pad_type_0 = const()[name = tensor<string, []>("op_2670_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2670_strides_0 = const()[name = tensor<string, []>("op_2670_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2670_pad_0 = const()[name = tensor<string, []>("op_2670_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2670_dilations_0 = const()[name = tensor<string, []>("op_2670_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2670_groups_0 = const()[name = tensor<string, []>("op_2670_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_10_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268073152)))];
+            tensor<fp16, [1024]> blocks_10_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270170368)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2670_cast_fp16 = conv(bias = blocks_10_attn_out_bias_to_fp16, dilations = var_2670_dilations_0, groups = var_2670_groups_0, pad = var_2670_pad_0, pad_type = var_2670_pad_type_0, strides = var_2670_strides_0, weight = blocks_10_attn_out_weight_to_fp16, x = input_105_cast_fp16)[name = tensor<string, []>("op_2670_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = var_2670_cast_fp16)[name = tensor<string, []>("inputs_43_cast_fp16")];
+            tensor<int32, [1]> input_107_axes_0 = const()[name = tensor<string, []>("input_107_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_107_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_107_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270172480)))];
+            tensor<fp16, [1024]> input_107_beta_0_to_fp16 = const()[name = tensor<string, []>("input_107_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270174592)))];
+            tensor<fp16, []> var_2680_to_fp16 = const()[name = tensor<string, []>("op_2680_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_107_cast_fp16 = layer_norm(axes = input_107_axes_0, beta = input_107_beta_0_to_fp16, epsilon = var_2680_to_fp16, gamma = input_107_gamma_0_to_fp16, x = inputs_43_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
+            tensor<string, []> input_109_pad_type_0 = const()[name = tensor<string, []>("input_109_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_109_strides_0 = const()[name = tensor<string, []>("input_109_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_109_pad_0 = const()[name = tensor<string, []>("input_109_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_109_dilations_0 = const()[name = tensor<string, []>("input_109_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_109_groups_0 = const()[name = tensor<string, []>("input_109_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_10_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270176704)))];
+            tensor<fp16, [4096]> blocks_10_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(278565376)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_109_cast_fp16 = conv(bias = blocks_10_mlp_0_bias_to_fp16, dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = blocks_10_mlp_0_weight_to_fp16, x = input_107_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
+            tensor<string, []> input_111_mode_0 = const()[name = tensor<string, []>("input_111_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_111_cast_fp16 = gelu(mode = input_111_mode_0, x = input_109_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
+            tensor<string, []> var_2706_pad_type_0 = const()[name = tensor<string, []>("op_2706_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2706_strides_0 = const()[name = tensor<string, []>("op_2706_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2706_pad_0 = const()[name = tensor<string, []>("op_2706_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2706_dilations_0 = const()[name = tensor<string, []>("op_2706_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2706_groups_0 = const()[name = tensor<string, []>("op_2706_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_10_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(278573632)))];
+            tensor<fp16, [1024]> blocks_10_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286962304)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2706_cast_fp16 = conv(bias = blocks_10_mlp_2_bias_to_fp16, dilations = var_2706_dilations_0, groups = var_2706_groups_0, pad = var_2706_pad_0, pad_type = var_2706_pad_type_0, strides = var_2706_strides_0, weight = blocks_10_mlp_2_weight_to_fp16, x = input_111_cast_fp16)[name = tensor<string, []>("op_2706_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = var_2706_cast_fp16)[name = tensor<string, []>("inputs_45_cast_fp16")];
+            tensor<int32, []> var_2715 = const()[name = tensor<string, []>("op_2715"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_113_axes_0 = const()[name = tensor<string, []>("input_113_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_113_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_113_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286964416)))];
+            tensor<fp16, [1024]> input_113_beta_0_to_fp16 = const()[name = tensor<string, []>("input_113_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286966528)))];
+            tensor<fp16, []> var_2731_to_fp16 = const()[name = tensor<string, []>("op_2731_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_113_cast_fp16 = layer_norm(axes = input_113_axes_0, beta = input_113_beta_0_to_fp16, epsilon = var_2731_to_fp16, gamma = input_113_gamma_0_to_fp16, x = inputs_45_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
+            tensor<string, []> q_23_pad_type_0 = const()[name = tensor<string, []>("q_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_23_strides_0 = const()[name = tensor<string, []>("q_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_23_pad_0 = const()[name = tensor<string, []>("q_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_23_dilations_0 = const()[name = tensor<string, []>("q_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_23_groups_0 = const()[name = tensor<string, []>("q_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_2766_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2766_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286968640)))];
+            tensor<fp16, [1024]> var_2766_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2766_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289065856)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2766_cast_fp16 = conv(bias = var_2766_bias_0_to_fp16, dilations = q_23_dilations_0, groups = q_23_groups_0, pad = q_23_pad_0, pad_type = q_23_pad_type_0, strides = q_23_strides_0, weight = var_2766_weight_0_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("op_2766_cast_fp16")];
+            tensor<string, []> k_23_pad_type_0 = const()[name = tensor<string, []>("k_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_23_strides_0 = const()[name = tensor<string, []>("k_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_23_pad_0 = const()[name = tensor<string, []>("k_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_23_dilations_0 = const()[name = tensor<string, []>("k_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_23_groups_0 = const()[name = tensor<string, []>("k_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_11_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289067968)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_23_cast_fp16 = conv(dilations = k_23_dilations_0, groups = k_23_groups_0, pad = k_23_pad_0, pad_type = k_23_pad_type_0, strides = k_23_strides_0, weight = blocks_11_attn_key_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("k_23_cast_fp16")];
+            tensor<string, []> var_2764_pad_type_0 = const()[name = tensor<string, []>("op_2764_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2764_strides_0 = const()[name = tensor<string, []>("op_2764_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2764_pad_0 = const()[name = tensor<string, []>("op_2764_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2764_dilations_0 = const()[name = tensor<string, []>("op_2764_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2764_groups_0 = const()[name = tensor<string, []>("op_2764_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_11_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(291165184)))];
+            tensor<fp16, [1024]> blocks_11_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293262400)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2764_cast_fp16 = conv(bias = blocks_11_attn_value_bias_to_fp16, dilations = var_2764_dilations_0, groups = var_2764_groups_0, pad = var_2764_pad_0, pad_type = var_2764_pad_type_0, strides = var_2764_strides_0, weight = blocks_11_attn_value_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("op_2764_cast_fp16")];
+            tensor<int32, [16]> tile_33 = const()[name = tensor<string, []>("tile_33"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2767_axis_0 = const()[name = tensor<string, []>("op_2767_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_15 = split(axis = var_2767_axis_0, split_sizes = tile_33, x = var_2766_cast_fp16)[name = tensor<string, []>("op_2767_cast_fp16")];
+            tensor<int32, [4]> var_2784_perm_0 = const()[name = tensor<string, []>("op_2784_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_34 = const()[name = tensor<string, []>("tile_34"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2785_axis_0 = const()[name = tensor<string, []>("op_2785_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_2784_cast_fp16 = transpose(perm = var_2784_perm_0, x = k_23_cast_fp16)[name = tensor<string, []>("transpose_13")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_15 = split(axis = var_2785_axis_0, split_sizes = tile_34, x = var_2784_cast_fp16)[name = tensor<string, []>("op_2785_cast_fp16")];
+            tensor<int32, [16]> tile_35 = const()[name = tensor<string, []>("tile_35"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2802_axis_0 = const()[name = tensor<string, []>("op_2802_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_15 = split(axis = var_2802_axis_0, split_sizes = tile_35, x = var_2764_cast_fp16)[name = tensor<string, []>("op_2802_cast_fp16")];
+            tensor<string, []> aw_353_equation_0 = const()[name = tensor<string, []>("aw_353_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_353_cast_fp16 = einsum(equation = aw_353_equation_0, values = (var_2785_cast_fp16_0, var_2767_cast_fp16_0))[name = tensor<string, []>("aw_353_cast_fp16")];
+            tensor<string, []> aw_355_equation_0 = const()[name = tensor<string, []>("aw_355_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_355_cast_fp16 = einsum(equation = aw_355_equation_0, values = (var_2785_cast_fp16_1, var_2767_cast_fp16_1))[name = tensor<string, []>("aw_355_cast_fp16")];
+            tensor<string, []> aw_357_equation_0 = const()[name = tensor<string, []>("aw_357_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_357_cast_fp16 = einsum(equation = aw_357_equation_0, values = (var_2785_cast_fp16_2, var_2767_cast_fp16_2))[name = tensor<string, []>("aw_357_cast_fp16")];
+            tensor<string, []> aw_359_equation_0 = const()[name = tensor<string, []>("aw_359_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_359_cast_fp16 = einsum(equation = aw_359_equation_0, values = (var_2785_cast_fp16_3, var_2767_cast_fp16_3))[name = tensor<string, []>("aw_359_cast_fp16")];
+            tensor<string, []> aw_361_equation_0 = const()[name = tensor<string, []>("aw_361_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_361_cast_fp16 = einsum(equation = aw_361_equation_0, values = (var_2785_cast_fp16_4, var_2767_cast_fp16_4))[name = tensor<string, []>("aw_361_cast_fp16")];
+            tensor<string, []> aw_363_equation_0 = const()[name = tensor<string, []>("aw_363_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_363_cast_fp16 = einsum(equation = aw_363_equation_0, values = (var_2785_cast_fp16_5, var_2767_cast_fp16_5))[name = tensor<string, []>("aw_363_cast_fp16")];
+            tensor<string, []> aw_365_equation_0 = const()[name = tensor<string, []>("aw_365_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_365_cast_fp16 = einsum(equation = aw_365_equation_0, values = (var_2785_cast_fp16_6, var_2767_cast_fp16_6))[name = tensor<string, []>("aw_365_cast_fp16")];
+            tensor<string, []> aw_367_equation_0 = const()[name = tensor<string, []>("aw_367_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_367_cast_fp16 = einsum(equation = aw_367_equation_0, values = (var_2785_cast_fp16_7, var_2767_cast_fp16_7))[name = tensor<string, []>("aw_367_cast_fp16")];
+            tensor<string, []> aw_369_equation_0 = const()[name = tensor<string, []>("aw_369_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_369_cast_fp16 = einsum(equation = aw_369_equation_0, values = (var_2785_cast_fp16_8, var_2767_cast_fp16_8))[name = tensor<string, []>("aw_369_cast_fp16")];
+            tensor<string, []> aw_371_equation_0 = const()[name = tensor<string, []>("aw_371_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_371_cast_fp16 = einsum(equation = aw_371_equation_0, values = (var_2785_cast_fp16_9, var_2767_cast_fp16_9))[name = tensor<string, []>("aw_371_cast_fp16")];
+            tensor<string, []> aw_373_equation_0 = const()[name = tensor<string, []>("aw_373_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_373_cast_fp16 = einsum(equation = aw_373_equation_0, values = (var_2785_cast_fp16_10, var_2767_cast_fp16_10))[name = tensor<string, []>("aw_373_cast_fp16")];
+            tensor<string, []> aw_375_equation_0 = const()[name = tensor<string, []>("aw_375_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_375_cast_fp16 = einsum(equation = aw_375_equation_0, values = (var_2785_cast_fp16_11, var_2767_cast_fp16_11))[name = tensor<string, []>("aw_375_cast_fp16")];
+            tensor<string, []> aw_377_equation_0 = const()[name = tensor<string, []>("aw_377_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_377_cast_fp16 = einsum(equation = aw_377_equation_0, values = (var_2785_cast_fp16_12, var_2767_cast_fp16_12))[name = tensor<string, []>("aw_377_cast_fp16")];
+            tensor<string, []> aw_379_equation_0 = const()[name = tensor<string, []>("aw_379_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_379_cast_fp16 = einsum(equation = aw_379_equation_0, values = (var_2785_cast_fp16_13, var_2767_cast_fp16_13))[name = tensor<string, []>("aw_379_cast_fp16")];
+            tensor<string, []> aw_381_equation_0 = const()[name = tensor<string, []>("aw_381_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_381_cast_fp16 = einsum(equation = aw_381_equation_0, values = (var_2785_cast_fp16_14, var_2767_cast_fp16_14))[name = tensor<string, []>("aw_381_cast_fp16")];
+            tensor<string, []> aw_383_equation_0 = const()[name = tensor<string, []>("aw_383_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_383_cast_fp16 = einsum(equation = aw_383_equation_0, values = (var_2785_cast_fp16_15, var_2767_cast_fp16_15))[name = tensor<string, []>("aw_383_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2851_cast_fp16 = softmax(axis = var_2715, x = aw_353_cast_fp16)[name = tensor<string, []>("op_2851_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2852_cast_fp16 = softmax(axis = var_2715, x = aw_355_cast_fp16)[name = tensor<string, []>("op_2852_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2853_cast_fp16 = softmax(axis = var_2715, x = aw_357_cast_fp16)[name = tensor<string, []>("op_2853_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2854_cast_fp16 = softmax(axis = var_2715, x = aw_359_cast_fp16)[name = tensor<string, []>("op_2854_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2855_cast_fp16 = softmax(axis = var_2715, x = aw_361_cast_fp16)[name = tensor<string, []>("op_2855_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2856_cast_fp16 = softmax(axis = var_2715, x = aw_363_cast_fp16)[name = tensor<string, []>("op_2856_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2857_cast_fp16 = softmax(axis = var_2715, x = aw_365_cast_fp16)[name = tensor<string, []>("op_2857_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2858_cast_fp16 = softmax(axis = var_2715, x = aw_367_cast_fp16)[name = tensor<string, []>("op_2858_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2859_cast_fp16 = softmax(axis = var_2715, x = aw_369_cast_fp16)[name = tensor<string, []>("op_2859_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2860_cast_fp16 = softmax(axis = var_2715, x = aw_371_cast_fp16)[name = tensor<string, []>("op_2860_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2861_cast_fp16 = softmax(axis = var_2715, x = aw_373_cast_fp16)[name = tensor<string, []>("op_2861_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2862_cast_fp16 = softmax(axis = var_2715, x = aw_375_cast_fp16)[name = tensor<string, []>("op_2862_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2863_cast_fp16 = softmax(axis = var_2715, x = aw_377_cast_fp16)[name = tensor<string, []>("op_2863_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2864_cast_fp16 = softmax(axis = var_2715, x = aw_379_cast_fp16)[name = tensor<string, []>("op_2864_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2865_cast_fp16 = softmax(axis = var_2715, x = aw_381_cast_fp16)[name = tensor<string, []>("op_2865_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2866_cast_fp16 = softmax(axis = var_2715, x = aw_383_cast_fp16)[name = tensor<string, []>("op_2866_cast_fp16")];
+            tensor<string, []> var_2868_equation_0 = const()[name = tensor<string, []>("op_2868_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2868_cast_fp16 = einsum(equation = var_2868_equation_0, values = (var_2802_cast_fp16_0, var_2851_cast_fp16))[name = tensor<string, []>("op_2868_cast_fp16")];
+            tensor<string, []> var_2870_equation_0 = const()[name = tensor<string, []>("op_2870_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2870_cast_fp16 = einsum(equation = var_2870_equation_0, values = (var_2802_cast_fp16_1, var_2852_cast_fp16))[name = tensor<string, []>("op_2870_cast_fp16")];
+            tensor<string, []> var_2872_equation_0 = const()[name = tensor<string, []>("op_2872_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2872_cast_fp16 = einsum(equation = var_2872_equation_0, values = (var_2802_cast_fp16_2, var_2853_cast_fp16))[name = tensor<string, []>("op_2872_cast_fp16")];
+            tensor<string, []> var_2874_equation_0 = const()[name = tensor<string, []>("op_2874_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2874_cast_fp16 = einsum(equation = var_2874_equation_0, values = (var_2802_cast_fp16_3, var_2854_cast_fp16))[name = tensor<string, []>("op_2874_cast_fp16")];
+            tensor<string, []> var_2876_equation_0 = const()[name = tensor<string, []>("op_2876_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2876_cast_fp16 = einsum(equation = var_2876_equation_0, values = (var_2802_cast_fp16_4, var_2855_cast_fp16))[name = tensor<string, []>("op_2876_cast_fp16")];
+            tensor<string, []> var_2878_equation_0 = const()[name = tensor<string, []>("op_2878_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2878_cast_fp16 = einsum(equation = var_2878_equation_0, values = (var_2802_cast_fp16_5, var_2856_cast_fp16))[name = tensor<string, []>("op_2878_cast_fp16")];
+            tensor<string, []> var_2880_equation_0 = const()[name = tensor<string, []>("op_2880_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2880_cast_fp16 = einsum(equation = var_2880_equation_0, values = (var_2802_cast_fp16_6, var_2857_cast_fp16))[name = tensor<string, []>("op_2880_cast_fp16")];
+            tensor<string, []> var_2882_equation_0 = const()[name = tensor<string, []>("op_2882_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2882_cast_fp16 = einsum(equation = var_2882_equation_0, values = (var_2802_cast_fp16_7, var_2858_cast_fp16))[name = tensor<string, []>("op_2882_cast_fp16")];
+            tensor<string, []> var_2884_equation_0 = const()[name = tensor<string, []>("op_2884_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2884_cast_fp16 = einsum(equation = var_2884_equation_0, values = (var_2802_cast_fp16_8, var_2859_cast_fp16))[name = tensor<string, []>("op_2884_cast_fp16")];
+            tensor<string, []> var_2886_equation_0 = const()[name = tensor<string, []>("op_2886_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2886_cast_fp16 = einsum(equation = var_2886_equation_0, values = (var_2802_cast_fp16_9, var_2860_cast_fp16))[name = tensor<string, []>("op_2886_cast_fp16")];
+            tensor<string, []> var_2888_equation_0 = const()[name = tensor<string, []>("op_2888_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2888_cast_fp16 = einsum(equation = var_2888_equation_0, values = (var_2802_cast_fp16_10, var_2861_cast_fp16))[name = tensor<string, []>("op_2888_cast_fp16")];
+            tensor<string, []> var_2890_equation_0 = const()[name = tensor<string, []>("op_2890_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2890_cast_fp16 = einsum(equation = var_2890_equation_0, values = (var_2802_cast_fp16_11, var_2862_cast_fp16))[name = tensor<string, []>("op_2890_cast_fp16")];
+            tensor<string, []> var_2892_equation_0 = const()[name = tensor<string, []>("op_2892_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2892_cast_fp16 = einsum(equation = var_2892_equation_0, values = (var_2802_cast_fp16_12, var_2863_cast_fp16))[name = tensor<string, []>("op_2892_cast_fp16")];
+            tensor<string, []> var_2894_equation_0 = const()[name = tensor<string, []>("op_2894_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2894_cast_fp16 = einsum(equation = var_2894_equation_0, values = (var_2802_cast_fp16_13, var_2864_cast_fp16))[name = tensor<string, []>("op_2894_cast_fp16")];
+            tensor<string, []> var_2896_equation_0 = const()[name = tensor<string, []>("op_2896_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2896_cast_fp16 = einsum(equation = var_2896_equation_0, values = (var_2802_cast_fp16_14, var_2865_cast_fp16))[name = tensor<string, []>("op_2896_cast_fp16")];
+            tensor<string, []> var_2898_equation_0 = const()[name = tensor<string, []>("op_2898_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2898_cast_fp16 = einsum(equation = var_2898_equation_0, values = (var_2802_cast_fp16_15, var_2866_cast_fp16))[name = tensor<string, []>("op_2898_cast_fp16")];
+            tensor<bool, []> input_115_interleave_0 = const()[name = tensor<string, []>("input_115_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_115_cast_fp16 = concat(axis = var_2715, interleave = input_115_interleave_0, values = (var_2868_cast_fp16, var_2870_cast_fp16, var_2872_cast_fp16, var_2874_cast_fp16, var_2876_cast_fp16, var_2878_cast_fp16, var_2880_cast_fp16, var_2882_cast_fp16, var_2884_cast_fp16, var_2886_cast_fp16, var_2888_cast_fp16, var_2890_cast_fp16, var_2892_cast_fp16, var_2894_cast_fp16, var_2896_cast_fp16, var_2898_cast_fp16))[name = tensor<string, []>("input_115_cast_fp16")];
+            tensor<string, []> var_2907_pad_type_0 = const()[name = tensor<string, []>("op_2907_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2907_strides_0 = const()[name = tensor<string, []>("op_2907_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2907_pad_0 = const()[name = tensor<string, []>("op_2907_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2907_dilations_0 = const()[name = tensor<string, []>("op_2907_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2907_groups_0 = const()[name = tensor<string, []>("op_2907_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_11_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293264512)))];
+            tensor<fp16, [1024]> blocks_11_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295361728)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2907_cast_fp16 = conv(bias = blocks_11_attn_out_bias_to_fp16, dilations = var_2907_dilations_0, groups = var_2907_groups_0, pad = var_2907_pad_0, pad_type = var_2907_pad_type_0, strides = var_2907_strides_0, weight = blocks_11_attn_out_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("op_2907_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = var_2907_cast_fp16)[name = tensor<string, []>("inputs_47_cast_fp16")];
+            tensor<int32, [1]> input_117_axes_0 = const()[name = tensor<string, []>("input_117_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_117_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_117_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295363840)))];
+            tensor<fp16, [1024]> input_117_beta_0_to_fp16 = const()[name = tensor<string, []>("input_117_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295365952)))];
+            tensor<fp16, []> var_2917_to_fp16 = const()[name = tensor<string, []>("op_2917_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_117_cast_fp16 = layer_norm(axes = input_117_axes_0, beta = input_117_beta_0_to_fp16, epsilon = var_2917_to_fp16, gamma = input_117_gamma_0_to_fp16, x = inputs_47_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
+            tensor<string, []> input_119_pad_type_0 = const()[name = tensor<string, []>("input_119_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_119_strides_0 = const()[name = tensor<string, []>("input_119_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_119_pad_0 = const()[name = tensor<string, []>("input_119_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_119_dilations_0 = const()[name = tensor<string, []>("input_119_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_119_groups_0 = const()[name = tensor<string, []>("input_119_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_11_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295368064)))];
+            tensor<fp16, [4096]> blocks_11_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303756736)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_119_cast_fp16 = conv(bias = blocks_11_mlp_0_bias_to_fp16, dilations = input_119_dilations_0, groups = input_119_groups_0, pad = input_119_pad_0, pad_type = input_119_pad_type_0, strides = input_119_strides_0, weight = blocks_11_mlp_0_weight_to_fp16, x = input_117_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")];
+            tensor<string, []> input_121_mode_0 = const()[name = tensor<string, []>("input_121_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_121_cast_fp16 = gelu(mode = input_121_mode_0, x = input_119_cast_fp16)[name = tensor<string, []>("input_121_cast_fp16")];
+            tensor<string, []> var_2943_pad_type_0 = const()[name = tensor<string, []>("op_2943_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2943_strides_0 = const()[name = tensor<string, []>("op_2943_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2943_pad_0 = const()[name = tensor<string, []>("op_2943_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2943_dilations_0 = const()[name = tensor<string, []>("op_2943_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2943_groups_0 = const()[name = tensor<string, []>("op_2943_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_11_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303764992)))];
+            tensor<fp16, [1024]> blocks_11_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312153664)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2943_cast_fp16 = conv(bias = blocks_11_mlp_2_bias_to_fp16, dilations = var_2943_dilations_0, groups = var_2943_groups_0, pad = var_2943_pad_0, pad_type = var_2943_pad_type_0, strides = var_2943_strides_0, weight = blocks_11_mlp_2_weight_to_fp16, x = input_121_cast_fp16)[name = tensor<string, []>("op_2943_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = var_2943_cast_fp16)[name = tensor<string, []>("inputs_49_cast_fp16")];
+            tensor<int32, []> var_2952 = const()[name = tensor<string, []>("op_2952"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_123_axes_0 = const()[name = tensor<string, []>("input_123_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_123_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_123_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312155776)))];
+            tensor<fp16, [1024]> input_123_beta_0_to_fp16 = const()[name = tensor<string, []>("input_123_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312157888)))];
+            tensor<fp16, []> var_2968_to_fp16 = const()[name = tensor<string, []>("op_2968_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_123_cast_fp16 = layer_norm(axes = input_123_axes_0, beta = input_123_beta_0_to_fp16, epsilon = var_2968_to_fp16, gamma = input_123_gamma_0_to_fp16, x = inputs_49_cast_fp16)[name = tensor<string, []>("input_123_cast_fp16")];
+            tensor<string, []> q_25_pad_type_0 = const()[name = tensor<string, []>("q_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_25_strides_0 = const()[name = tensor<string, []>("q_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_25_pad_0 = const()[name = tensor<string, []>("q_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_25_dilations_0 = const()[name = tensor<string, []>("q_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_25_groups_0 = const()[name = tensor<string, []>("q_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_3003_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3003_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312160000)))];
+            tensor<fp16, [1024]> var_3003_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3003_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(314257216)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3003_cast_fp16 = conv(bias = var_3003_bias_0_to_fp16, dilations = q_25_dilations_0, groups = q_25_groups_0, pad = q_25_pad_0, pad_type = q_25_pad_type_0, strides = q_25_strides_0, weight = var_3003_weight_0_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("op_3003_cast_fp16")];
+            tensor<string, []> k_25_pad_type_0 = const()[name = tensor<string, []>("k_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_25_strides_0 = const()[name = tensor<string, []>("k_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_25_pad_0 = const()[name = tensor<string, []>("k_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_25_dilations_0 = const()[name = tensor<string, []>("k_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_25_groups_0 = const()[name = tensor<string, []>("k_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_12_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(314259328)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_25_cast_fp16 = conv(dilations = k_25_dilations_0, groups = k_25_groups_0, pad = k_25_pad_0, pad_type = k_25_pad_type_0, strides = k_25_strides_0, weight = blocks_12_attn_key_weight_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("k_25_cast_fp16")];
+            tensor<string, []> var_3001_pad_type_0 = const()[name = tensor<string, []>("op_3001_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3001_strides_0 = const()[name = tensor<string, []>("op_3001_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3001_pad_0 = const()[name = tensor<string, []>("op_3001_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3001_dilations_0 = const()[name = tensor<string, []>("op_3001_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3001_groups_0 = const()[name = tensor<string, []>("op_3001_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_12_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(316356544)))];
+            tensor<fp16, [1024]> blocks_12_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(318453760)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3001_cast_fp16 = conv(bias = blocks_12_attn_value_bias_to_fp16, dilations = var_3001_dilations_0, groups = var_3001_groups_0, pad = var_3001_pad_0, pad_type = var_3001_pad_type_0, strides = var_3001_strides_0, weight = blocks_12_attn_value_weight_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("op_3001_cast_fp16")];
+            tensor<int32, [16]> tile_36 = const()[name = tensor<string, []>("tile_36"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3004_axis_0 = const()[name = tensor<string, []>("op_3004_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_15 = split(axis = var_3004_axis_0, split_sizes = tile_36, x = var_3003_cast_fp16)[name = tensor<string, []>("op_3004_cast_fp16")];
+            tensor<int32, [4]> var_3021_perm_0 = const()[name = tensor<string, []>("op_3021_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_37 = const()[name = tensor<string, []>("tile_37"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3022_axis_0 = const()[name = tensor<string, []>("op_3022_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_3021_cast_fp16 = transpose(perm = var_3021_perm_0, x = k_25_cast_fp16)[name = tensor<string, []>("transpose_12")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_15 = split(axis = var_3022_axis_0, split_sizes = tile_37, x = var_3021_cast_fp16)[name = tensor<string, []>("op_3022_cast_fp16")];
+            tensor<int32, [16]> tile_38 = const()[name = tensor<string, []>("tile_38"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3039_axis_0 = const()[name = tensor<string, []>("op_3039_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_15 = split(axis = var_3039_axis_0, split_sizes = tile_38, x = var_3001_cast_fp16)[name = tensor<string, []>("op_3039_cast_fp16")];
+            tensor<string, []> aw_385_equation_0 = const()[name = tensor<string, []>("aw_385_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_385_cast_fp16 = einsum(equation = aw_385_equation_0, values = (var_3022_cast_fp16_0, var_3004_cast_fp16_0))[name = tensor<string, []>("aw_385_cast_fp16")];
+            tensor<string, []> aw_387_equation_0 = const()[name = tensor<string, []>("aw_387_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_387_cast_fp16 = einsum(equation = aw_387_equation_0, values = (var_3022_cast_fp16_1, var_3004_cast_fp16_1))[name = tensor<string, []>("aw_387_cast_fp16")];
+            tensor<string, []> aw_389_equation_0 = const()[name = tensor<string, []>("aw_389_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_389_cast_fp16 = einsum(equation = aw_389_equation_0, values = (var_3022_cast_fp16_2, var_3004_cast_fp16_2))[name = tensor<string, []>("aw_389_cast_fp16")];
+            tensor<string, []> aw_391_equation_0 = const()[name = tensor<string, []>("aw_391_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_391_cast_fp16 = einsum(equation = aw_391_equation_0, values = (var_3022_cast_fp16_3, var_3004_cast_fp16_3))[name = tensor<string, []>("aw_391_cast_fp16")];
+            tensor<string, []> aw_393_equation_0 = const()[name = tensor<string, []>("aw_393_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_393_cast_fp16 = einsum(equation = aw_393_equation_0, values = (var_3022_cast_fp16_4, var_3004_cast_fp16_4))[name = tensor<string, []>("aw_393_cast_fp16")];
+            tensor<string, []> aw_395_equation_0 = const()[name = tensor<string, []>("aw_395_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_395_cast_fp16 = einsum(equation = aw_395_equation_0, values = (var_3022_cast_fp16_5, var_3004_cast_fp16_5))[name = tensor<string, []>("aw_395_cast_fp16")];
+            tensor<string, []> aw_397_equation_0 = const()[name = tensor<string, []>("aw_397_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_397_cast_fp16 = einsum(equation = aw_397_equation_0, values = (var_3022_cast_fp16_6, var_3004_cast_fp16_6))[name = tensor<string, []>("aw_397_cast_fp16")];
+            tensor<string, []> aw_399_equation_0 = const()[name = tensor<string, []>("aw_399_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_399_cast_fp16 = einsum(equation = aw_399_equation_0, values = (var_3022_cast_fp16_7, var_3004_cast_fp16_7))[name = tensor<string, []>("aw_399_cast_fp16")];
+            tensor<string, []> aw_401_equation_0 = const()[name = tensor<string, []>("aw_401_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_401_cast_fp16 = einsum(equation = aw_401_equation_0, values = (var_3022_cast_fp16_8, var_3004_cast_fp16_8))[name = tensor<string, []>("aw_401_cast_fp16")];
+            tensor<string, []> aw_403_equation_0 = const()[name = tensor<string, []>("aw_403_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_403_cast_fp16 = einsum(equation = aw_403_equation_0, values = (var_3022_cast_fp16_9, var_3004_cast_fp16_9))[name = tensor<string, []>("aw_403_cast_fp16")];
+            tensor<string, []> aw_405_equation_0 = const()[name = tensor<string, []>("aw_405_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_405_cast_fp16 = einsum(equation = aw_405_equation_0, values = (var_3022_cast_fp16_10, var_3004_cast_fp16_10))[name = tensor<string, []>("aw_405_cast_fp16")];
+            tensor<string, []> aw_407_equation_0 = const()[name = tensor<string, []>("aw_407_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_407_cast_fp16 = einsum(equation = aw_407_equation_0, values = (var_3022_cast_fp16_11, var_3004_cast_fp16_11))[name = tensor<string, []>("aw_407_cast_fp16")];
+            tensor<string, []> aw_409_equation_0 = const()[name = tensor<string, []>("aw_409_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_409_cast_fp16 = einsum(equation = aw_409_equation_0, values = (var_3022_cast_fp16_12, var_3004_cast_fp16_12))[name = tensor<string, []>("aw_409_cast_fp16")];
+            tensor<string, []> aw_411_equation_0 = const()[name = tensor<string, []>("aw_411_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_411_cast_fp16 = einsum(equation = aw_411_equation_0, values = (var_3022_cast_fp16_13, var_3004_cast_fp16_13))[name = tensor<string, []>("aw_411_cast_fp16")];
+            tensor<string, []> aw_413_equation_0 = const()[name = tensor<string, []>("aw_413_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_413_cast_fp16 = einsum(equation = aw_413_equation_0, values = (var_3022_cast_fp16_14, var_3004_cast_fp16_14))[name = tensor<string, []>("aw_413_cast_fp16")];
+            tensor<string, []> aw_415_equation_0 = const()[name = tensor<string, []>("aw_415_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_415_cast_fp16 = einsum(equation = aw_415_equation_0, values = (var_3022_cast_fp16_15, var_3004_cast_fp16_15))[name = tensor<string, []>("aw_415_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3088_cast_fp16 = softmax(axis = var_2952, x = aw_385_cast_fp16)[name = tensor<string, []>("op_3088_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3089_cast_fp16 = softmax(axis = var_2952, x = aw_387_cast_fp16)[name = tensor<string, []>("op_3089_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3090_cast_fp16 = softmax(axis = var_2952, x = aw_389_cast_fp16)[name = tensor<string, []>("op_3090_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3091_cast_fp16 = softmax(axis = var_2952, x = aw_391_cast_fp16)[name = tensor<string, []>("op_3091_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3092_cast_fp16 = softmax(axis = var_2952, x = aw_393_cast_fp16)[name = tensor<string, []>("op_3092_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3093_cast_fp16 = softmax(axis = var_2952, x = aw_395_cast_fp16)[name = tensor<string, []>("op_3093_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3094_cast_fp16 = softmax(axis = var_2952, x = aw_397_cast_fp16)[name = tensor<string, []>("op_3094_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3095_cast_fp16 = softmax(axis = var_2952, x = aw_399_cast_fp16)[name = tensor<string, []>("op_3095_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3096_cast_fp16 = softmax(axis = var_2952, x = aw_401_cast_fp16)[name = tensor<string, []>("op_3096_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3097_cast_fp16 = softmax(axis = var_2952, x = aw_403_cast_fp16)[name = tensor<string, []>("op_3097_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3098_cast_fp16 = softmax(axis = var_2952, x = aw_405_cast_fp16)[name = tensor<string, []>("op_3098_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3099_cast_fp16 = softmax(axis = var_2952, x = aw_407_cast_fp16)[name = tensor<string, []>("op_3099_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3100_cast_fp16 = softmax(axis = var_2952, x = aw_409_cast_fp16)[name = tensor<string, []>("op_3100_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3101_cast_fp16 = softmax(axis = var_2952, x = aw_411_cast_fp16)[name = tensor<string, []>("op_3101_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3102_cast_fp16 = softmax(axis = var_2952, x = aw_413_cast_fp16)[name = tensor<string, []>("op_3102_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3103_cast_fp16 = softmax(axis = var_2952, x = aw_415_cast_fp16)[name = tensor<string, []>("op_3103_cast_fp16")];
+            tensor<string, []> var_3105_equation_0 = const()[name = tensor<string, []>("op_3105_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3105_cast_fp16 = einsum(equation = var_3105_equation_0, values = (var_3039_cast_fp16_0, var_3088_cast_fp16))[name = tensor<string, []>("op_3105_cast_fp16")];
+            tensor<string, []> var_3107_equation_0 = const()[name = tensor<string, []>("op_3107_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3107_cast_fp16 = einsum(equation = var_3107_equation_0, values = (var_3039_cast_fp16_1, var_3089_cast_fp16))[name = tensor<string, []>("op_3107_cast_fp16")];
+            tensor<string, []> var_3109_equation_0 = const()[name = tensor<string, []>("op_3109_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3109_cast_fp16 = einsum(equation = var_3109_equation_0, values = (var_3039_cast_fp16_2, var_3090_cast_fp16))[name = tensor<string, []>("op_3109_cast_fp16")];
+            tensor<string, []> var_3111_equation_0 = const()[name = tensor<string, []>("op_3111_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3111_cast_fp16 = einsum(equation = var_3111_equation_0, values = (var_3039_cast_fp16_3, var_3091_cast_fp16))[name = tensor<string, []>("op_3111_cast_fp16")];
+            tensor<string, []> var_3113_equation_0 = const()[name = tensor<string, []>("op_3113_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3113_cast_fp16 = einsum(equation = var_3113_equation_0, values = (var_3039_cast_fp16_4, var_3092_cast_fp16))[name = tensor<string, []>("op_3113_cast_fp16")];
+            tensor<string, []> var_3115_equation_0 = const()[name = tensor<string, []>("op_3115_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3115_cast_fp16 = einsum(equation = var_3115_equation_0, values = (var_3039_cast_fp16_5, var_3093_cast_fp16))[name = tensor<string, []>("op_3115_cast_fp16")];
+            tensor<string, []> var_3117_equation_0 = const()[name = tensor<string, []>("op_3117_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3117_cast_fp16 = einsum(equation = var_3117_equation_0, values = (var_3039_cast_fp16_6, var_3094_cast_fp16))[name = tensor<string, []>("op_3117_cast_fp16")];
+            tensor<string, []> var_3119_equation_0 = const()[name = tensor<string, []>("op_3119_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3119_cast_fp16 = einsum(equation = var_3119_equation_0, values = (var_3039_cast_fp16_7, var_3095_cast_fp16))[name = tensor<string, []>("op_3119_cast_fp16")];
+            tensor<string, []> var_3121_equation_0 = const()[name = tensor<string, []>("op_3121_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3121_cast_fp16 = einsum(equation = var_3121_equation_0, values = (var_3039_cast_fp16_8, var_3096_cast_fp16))[name = tensor<string, []>("op_3121_cast_fp16")];
+            tensor<string, []> var_3123_equation_0 = const()[name = tensor<string, []>("op_3123_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3123_cast_fp16 = einsum(equation = var_3123_equation_0, values = (var_3039_cast_fp16_9, var_3097_cast_fp16))[name = tensor<string, []>("op_3123_cast_fp16")];
+            tensor<string, []> var_3125_equation_0 = const()[name = tensor<string, []>("op_3125_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3125_cast_fp16 = einsum(equation = var_3125_equation_0, values = (var_3039_cast_fp16_10, var_3098_cast_fp16))[name = tensor<string, []>("op_3125_cast_fp16")];
+            tensor<string, []> var_3127_equation_0 = const()[name = tensor<string, []>("op_3127_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3127_cast_fp16 = einsum(equation = var_3127_equation_0, values = (var_3039_cast_fp16_11, var_3099_cast_fp16))[name = tensor<string, []>("op_3127_cast_fp16")];
+            tensor<string, []> var_3129_equation_0 = const()[name = tensor<string, []>("op_3129_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3129_cast_fp16 = einsum(equation = var_3129_equation_0, values = (var_3039_cast_fp16_12, var_3100_cast_fp16))[name = tensor<string, []>("op_3129_cast_fp16")];
+            tensor<string, []> var_3131_equation_0 = const()[name = tensor<string, []>("op_3131_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3131_cast_fp16 = einsum(equation = var_3131_equation_0, values = (var_3039_cast_fp16_13, var_3101_cast_fp16))[name = tensor<string, []>("op_3131_cast_fp16")];
+            tensor<string, []> var_3133_equation_0 = const()[name = tensor<string, []>("op_3133_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3133_cast_fp16 = einsum(equation = var_3133_equation_0, values = (var_3039_cast_fp16_14, var_3102_cast_fp16))[name = tensor<string, []>("op_3133_cast_fp16")];
+            tensor<string, []> var_3135_equation_0 = const()[name = tensor<string, []>("op_3135_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16 = einsum(equation = var_3135_equation_0, values = (var_3039_cast_fp16_15, var_3103_cast_fp16))[name = tensor<string, []>("op_3135_cast_fp16")];
+            tensor<bool, []> input_125_interleave_0 = const()[name = tensor<string, []>("input_125_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_125_cast_fp16 = concat(axis = var_2952, interleave = input_125_interleave_0, values = (var_3105_cast_fp16, var_3107_cast_fp16, var_3109_cast_fp16, var_3111_cast_fp16, var_3113_cast_fp16, var_3115_cast_fp16, var_3117_cast_fp16, var_3119_cast_fp16, var_3121_cast_fp16, var_3123_cast_fp16, var_3125_cast_fp16, var_3127_cast_fp16, var_3129_cast_fp16, var_3131_cast_fp16, var_3133_cast_fp16, var_3135_cast_fp16))[name = tensor<string, []>("input_125_cast_fp16")];
+            tensor<string, []> var_3144_pad_type_0 = const()[name = tensor<string, []>("op_3144_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3144_strides_0 = const()[name = tensor<string, []>("op_3144_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3144_pad_0 = const()[name = tensor<string, []>("op_3144_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3144_dilations_0 = const()[name = tensor<string, []>("op_3144_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3144_groups_0 = const()[name = tensor<string, []>("op_3144_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_12_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(318455872)))];
+            tensor<fp16, [1024]> blocks_12_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(320553088)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3144_cast_fp16 = conv(bias = blocks_12_attn_out_bias_to_fp16, dilations = var_3144_dilations_0, groups = var_3144_groups_0, pad = var_3144_pad_0, pad_type = var_3144_pad_type_0, strides = var_3144_strides_0, weight = blocks_12_attn_out_weight_to_fp16, x = input_125_cast_fp16)[name = tensor<string, []>("op_3144_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = var_3144_cast_fp16)[name = tensor<string, []>("inputs_51_cast_fp16")];
+            tensor<int32, [1]> input_127_axes_0 = const()[name = tensor<string, []>("input_127_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_127_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_127_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(320555200)))];
+            tensor<fp16, [1024]> input_127_beta_0_to_fp16 = const()[name = tensor<string, []>("input_127_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(320557312)))];
+            tensor<fp16, []> var_3154_to_fp16 = const()[name = tensor<string, []>("op_3154_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_127_cast_fp16 = layer_norm(axes = input_127_axes_0, beta = input_127_beta_0_to_fp16, epsilon = var_3154_to_fp16, gamma = input_127_gamma_0_to_fp16, x = inputs_51_cast_fp16)[name = tensor<string, []>("input_127_cast_fp16")];
+            tensor<string, []> input_129_pad_type_0 = const()[name = tensor<string, []>("input_129_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_129_strides_0 = const()[name = tensor<string, []>("input_129_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_129_pad_0 = const()[name = tensor<string, []>("input_129_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_129_dilations_0 = const()[name = tensor<string, []>("input_129_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_129_groups_0 = const()[name = tensor<string, []>("input_129_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_12_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(320559424)))];
+            tensor<fp16, [4096]> blocks_12_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(328948096)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_129_cast_fp16 = conv(bias = blocks_12_mlp_0_bias_to_fp16, dilations = input_129_dilations_0, groups = input_129_groups_0, pad = input_129_pad_0, pad_type = input_129_pad_type_0, strides = input_129_strides_0, weight = blocks_12_mlp_0_weight_to_fp16, x = input_127_cast_fp16)[name = tensor<string, []>("input_129_cast_fp16")];
+            tensor<string, []> input_131_mode_0 = const()[name = tensor<string, []>("input_131_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_131_cast_fp16 = gelu(mode = input_131_mode_0, x = input_129_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")];
+            tensor<string, []> var_3180_pad_type_0 = const()[name = tensor<string, []>("op_3180_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3180_strides_0 = const()[name = tensor<string, []>("op_3180_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3180_pad_0 = const()[name = tensor<string, []>("op_3180_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3180_dilations_0 = const()[name = tensor<string, []>("op_3180_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3180_groups_0 = const()[name = tensor<string, []>("op_3180_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_12_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(328956352)))];
+            tensor<fp16, [1024]> blocks_12_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337345024)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3180_cast_fp16 = conv(bias = blocks_12_mlp_2_bias_to_fp16, dilations = var_3180_dilations_0, groups = var_3180_groups_0, pad = var_3180_pad_0, pad_type = var_3180_pad_type_0, strides = var_3180_strides_0, weight = blocks_12_mlp_2_weight_to_fp16, x = input_131_cast_fp16)[name = tensor<string, []>("op_3180_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = var_3180_cast_fp16)[name = tensor<string, []>("inputs_53_cast_fp16")];
+            tensor<int32, []> var_3189 = const()[name = tensor<string, []>("op_3189"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_133_axes_0 = const()[name = tensor<string, []>("input_133_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_133_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_133_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337347136)))];
+            tensor<fp16, [1024]> input_133_beta_0_to_fp16 = const()[name = tensor<string, []>("input_133_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337349248)))];
+            tensor<fp16, []> var_3205_to_fp16 = const()[name = tensor<string, []>("op_3205_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_133_cast_fp16 = layer_norm(axes = input_133_axes_0, beta = input_133_beta_0_to_fp16, epsilon = var_3205_to_fp16, gamma = input_133_gamma_0_to_fp16, x = inputs_53_cast_fp16)[name = tensor<string, []>("input_133_cast_fp16")];
+            tensor<string, []> q_27_pad_type_0 = const()[name = tensor<string, []>("q_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_27_strides_0 = const()[name = tensor<string, []>("q_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_27_pad_0 = const()[name = tensor<string, []>("q_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_27_dilations_0 = const()[name = tensor<string, []>("q_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_27_groups_0 = const()[name = tensor<string, []>("q_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_3240_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3240_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337351360)))];
+            tensor<fp16, [1024]> var_3240_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3240_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339448576)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3240_cast_fp16 = conv(bias = var_3240_bias_0_to_fp16, dilations = q_27_dilations_0, groups = q_27_groups_0, pad = q_27_pad_0, pad_type = q_27_pad_type_0, strides = q_27_strides_0, weight = var_3240_weight_0_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("op_3240_cast_fp16")];
+            tensor<string, []> k_27_pad_type_0 = const()[name = tensor<string, []>("k_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_27_strides_0 = const()[name = tensor<string, []>("k_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_27_pad_0 = const()[name = tensor<string, []>("k_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_27_dilations_0 = const()[name = tensor<string, []>("k_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_27_groups_0 = const()[name = tensor<string, []>("k_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_13_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339450688)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_27_cast_fp16 = conv(dilations = k_27_dilations_0, groups = k_27_groups_0, pad = k_27_pad_0, pad_type = k_27_pad_type_0, strides = k_27_strides_0, weight = blocks_13_attn_key_weight_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("k_27_cast_fp16")];
+            tensor<string, []> var_3238_pad_type_0 = const()[name = tensor<string, []>("op_3238_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3238_strides_0 = const()[name = tensor<string, []>("op_3238_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3238_pad_0 = const()[name = tensor<string, []>("op_3238_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3238_dilations_0 = const()[name = tensor<string, []>("op_3238_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3238_groups_0 = const()[name = tensor<string, []>("op_3238_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_13_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(341547904)))];
+            tensor<fp16, [1024]> blocks_13_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(343645120)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3238_cast_fp16 = conv(bias = blocks_13_attn_value_bias_to_fp16, dilations = var_3238_dilations_0, groups = var_3238_groups_0, pad = var_3238_pad_0, pad_type = var_3238_pad_type_0, strides = var_3238_strides_0, weight = blocks_13_attn_value_weight_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("op_3238_cast_fp16")];
+            tensor<int32, [16]> tile_39 = const()[name = tensor<string, []>("tile_39"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3241_axis_0 = const()[name = tensor<string, []>("op_3241_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_15 = split(axis = var_3241_axis_0, split_sizes = tile_39, x = var_3240_cast_fp16)[name = tensor<string, []>("op_3241_cast_fp16")];
+            tensor<int32, [4]> var_3258_perm_0 = const()[name = tensor<string, []>("op_3258_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_40 = const()[name = tensor<string, []>("tile_40"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3259_axis_0 = const()[name = tensor<string, []>("op_3259_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_3258_cast_fp16 = transpose(perm = var_3258_perm_0, x = k_27_cast_fp16)[name = tensor<string, []>("transpose_11")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_15 = split(axis = var_3259_axis_0, split_sizes = tile_40, x = var_3258_cast_fp16)[name = tensor<string, []>("op_3259_cast_fp16")];
+            tensor<int32, [16]> tile_41 = const()[name = tensor<string, []>("tile_41"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3276_axis_0 = const()[name = tensor<string, []>("op_3276_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_15 = split(axis = var_3276_axis_0, split_sizes = tile_41, x = var_3238_cast_fp16)[name = tensor<string, []>("op_3276_cast_fp16")];
+            tensor<string, []> aw_417_equation_0 = const()[name = tensor<string, []>("aw_417_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_417_cast_fp16 = einsum(equation = aw_417_equation_0, values = (var_3259_cast_fp16_0, var_3241_cast_fp16_0))[name = tensor<string, []>("aw_417_cast_fp16")];
+            tensor<string, []> aw_419_equation_0 = const()[name = tensor<string, []>("aw_419_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_419_cast_fp16 = einsum(equation = aw_419_equation_0, values = (var_3259_cast_fp16_1, var_3241_cast_fp16_1))[name = tensor<string, []>("aw_419_cast_fp16")];
+            tensor<string, []> aw_421_equation_0 = const()[name = tensor<string, []>("aw_421_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_421_cast_fp16 = einsum(equation = aw_421_equation_0, values = (var_3259_cast_fp16_2, var_3241_cast_fp16_2))[name = tensor<string, []>("aw_421_cast_fp16")];
+            tensor<string, []> aw_423_equation_0 = const()[name = tensor<string, []>("aw_423_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_423_cast_fp16 = einsum(equation = aw_423_equation_0, values = (var_3259_cast_fp16_3, var_3241_cast_fp16_3))[name = tensor<string, []>("aw_423_cast_fp16")];
+            tensor<string, []> aw_425_equation_0 = const()[name = tensor<string, []>("aw_425_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_425_cast_fp16 = einsum(equation = aw_425_equation_0, values = (var_3259_cast_fp16_4, var_3241_cast_fp16_4))[name = tensor<string, []>("aw_425_cast_fp16")];
+            tensor<string, []> aw_427_equation_0 = const()[name = tensor<string, []>("aw_427_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_427_cast_fp16 = einsum(equation = aw_427_equation_0, values = (var_3259_cast_fp16_5, var_3241_cast_fp16_5))[name = tensor<string, []>("aw_427_cast_fp16")];
+            tensor<string, []> aw_429_equation_0 = const()[name = tensor<string, []>("aw_429_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_429_cast_fp16 = einsum(equation = aw_429_equation_0, values = (var_3259_cast_fp16_6, var_3241_cast_fp16_6))[name = tensor<string, []>("aw_429_cast_fp16")];
+            tensor<string, []> aw_431_equation_0 = const()[name = tensor<string, []>("aw_431_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_431_cast_fp16 = einsum(equation = aw_431_equation_0, values = (var_3259_cast_fp16_7, var_3241_cast_fp16_7))[name = tensor<string, []>("aw_431_cast_fp16")];
+            tensor<string, []> aw_433_equation_0 = const()[name = tensor<string, []>("aw_433_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_433_cast_fp16 = einsum(equation = aw_433_equation_0, values = (var_3259_cast_fp16_8, var_3241_cast_fp16_8))[name = tensor<string, []>("aw_433_cast_fp16")];
+            tensor<string, []> aw_435_equation_0 = const()[name = tensor<string, []>("aw_435_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_435_cast_fp16 = einsum(equation = aw_435_equation_0, values = (var_3259_cast_fp16_9, var_3241_cast_fp16_9))[name = tensor<string, []>("aw_435_cast_fp16")];
+            tensor<string, []> aw_437_equation_0 = const()[name = tensor<string, []>("aw_437_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_437_cast_fp16 = einsum(equation = aw_437_equation_0, values = (var_3259_cast_fp16_10, var_3241_cast_fp16_10))[name = tensor<string, []>("aw_437_cast_fp16")];
+            tensor<string, []> aw_439_equation_0 = const()[name = tensor<string, []>("aw_439_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_439_cast_fp16 = einsum(equation = aw_439_equation_0, values = (var_3259_cast_fp16_11, var_3241_cast_fp16_11))[name = tensor<string, []>("aw_439_cast_fp16")];
+            tensor<string, []> aw_441_equation_0 = const()[name = tensor<string, []>("aw_441_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_441_cast_fp16 = einsum(equation = aw_441_equation_0, values = (var_3259_cast_fp16_12, var_3241_cast_fp16_12))[name = tensor<string, []>("aw_441_cast_fp16")];
+            tensor<string, []> aw_443_equation_0 = const()[name = tensor<string, []>("aw_443_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_443_cast_fp16 = einsum(equation = aw_443_equation_0, values = (var_3259_cast_fp16_13, var_3241_cast_fp16_13))[name = tensor<string, []>("aw_443_cast_fp16")];
+            tensor<string, []> aw_445_equation_0 = const()[name = tensor<string, []>("aw_445_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_445_cast_fp16 = einsum(equation = aw_445_equation_0, values = (var_3259_cast_fp16_14, var_3241_cast_fp16_14))[name = tensor<string, []>("aw_445_cast_fp16")];
+            tensor<string, []> aw_447_equation_0 = const()[name = tensor<string, []>("aw_447_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_447_cast_fp16 = einsum(equation = aw_447_equation_0, values = (var_3259_cast_fp16_15, var_3241_cast_fp16_15))[name = tensor<string, []>("aw_447_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3325_cast_fp16 = softmax(axis = var_3189, x = aw_417_cast_fp16)[name = tensor<string, []>("op_3325_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3326_cast_fp16 = softmax(axis = var_3189, x = aw_419_cast_fp16)[name = tensor<string, []>("op_3326_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3327_cast_fp16 = softmax(axis = var_3189, x = aw_421_cast_fp16)[name = tensor<string, []>("op_3327_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3328_cast_fp16 = softmax(axis = var_3189, x = aw_423_cast_fp16)[name = tensor<string, []>("op_3328_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3329_cast_fp16 = softmax(axis = var_3189, x = aw_425_cast_fp16)[name = tensor<string, []>("op_3329_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3330_cast_fp16 = softmax(axis = var_3189, x = aw_427_cast_fp16)[name = tensor<string, []>("op_3330_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3331_cast_fp16 = softmax(axis = var_3189, x = aw_429_cast_fp16)[name = tensor<string, []>("op_3331_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3332_cast_fp16 = softmax(axis = var_3189, x = aw_431_cast_fp16)[name = tensor<string, []>("op_3332_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3333_cast_fp16 = softmax(axis = var_3189, x = aw_433_cast_fp16)[name = tensor<string, []>("op_3333_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3334_cast_fp16 = softmax(axis = var_3189, x = aw_435_cast_fp16)[name = tensor<string, []>("op_3334_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3335_cast_fp16 = softmax(axis = var_3189, x = aw_437_cast_fp16)[name = tensor<string, []>("op_3335_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3336_cast_fp16 = softmax(axis = var_3189, x = aw_439_cast_fp16)[name = tensor<string, []>("op_3336_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3337_cast_fp16 = softmax(axis = var_3189, x = aw_441_cast_fp16)[name = tensor<string, []>("op_3337_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3338_cast_fp16 = softmax(axis = var_3189, x = aw_443_cast_fp16)[name = tensor<string, []>("op_3338_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3339_cast_fp16 = softmax(axis = var_3189, x = aw_445_cast_fp16)[name = tensor<string, []>("op_3339_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3340_cast_fp16 = softmax(axis = var_3189, x = aw_447_cast_fp16)[name = tensor<string, []>("op_3340_cast_fp16")];
+            tensor<string, []> var_3342_equation_0 = const()[name = tensor<string, []>("op_3342_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3342_cast_fp16 = einsum(equation = var_3342_equation_0, values = (var_3276_cast_fp16_0, var_3325_cast_fp16))[name = tensor<string, []>("op_3342_cast_fp16")];
+            tensor<string, []> var_3344_equation_0 = const()[name = tensor<string, []>("op_3344_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3344_cast_fp16 = einsum(equation = var_3344_equation_0, values = (var_3276_cast_fp16_1, var_3326_cast_fp16))[name = tensor<string, []>("op_3344_cast_fp16")];
+            tensor<string, []> var_3346_equation_0 = const()[name = tensor<string, []>("op_3346_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3346_cast_fp16 = einsum(equation = var_3346_equation_0, values = (var_3276_cast_fp16_2, var_3327_cast_fp16))[name = tensor<string, []>("op_3346_cast_fp16")];
+            tensor<string, []> var_3348_equation_0 = const()[name = tensor<string, []>("op_3348_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3348_cast_fp16 = einsum(equation = var_3348_equation_0, values = (var_3276_cast_fp16_3, var_3328_cast_fp16))[name = tensor<string, []>("op_3348_cast_fp16")];
+            tensor<string, []> var_3350_equation_0 = const()[name = tensor<string, []>("op_3350_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3350_cast_fp16 = einsum(equation = var_3350_equation_0, values = (var_3276_cast_fp16_4, var_3329_cast_fp16))[name = tensor<string, []>("op_3350_cast_fp16")];
+            tensor<string, []> var_3352_equation_0 = const()[name = tensor<string, []>("op_3352_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3352_cast_fp16 = einsum(equation = var_3352_equation_0, values = (var_3276_cast_fp16_5, var_3330_cast_fp16))[name = tensor<string, []>("op_3352_cast_fp16")];
+            tensor<string, []> var_3354_equation_0 = const()[name = tensor<string, []>("op_3354_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3354_cast_fp16 = einsum(equation = var_3354_equation_0, values = (var_3276_cast_fp16_6, var_3331_cast_fp16))[name = tensor<string, []>("op_3354_cast_fp16")];
+            tensor<string, []> var_3356_equation_0 = const()[name = tensor<string, []>("op_3356_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3356_cast_fp16 = einsum(equation = var_3356_equation_0, values = (var_3276_cast_fp16_7, var_3332_cast_fp16))[name = tensor<string, []>("op_3356_cast_fp16")];
+            tensor<string, []> var_3358_equation_0 = const()[name = tensor<string, []>("op_3358_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3358_cast_fp16 = einsum(equation = var_3358_equation_0, values = (var_3276_cast_fp16_8, var_3333_cast_fp16))[name = tensor<string, []>("op_3358_cast_fp16")];
+            tensor<string, []> var_3360_equation_0 = const()[name = tensor<string, []>("op_3360_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3360_cast_fp16 = einsum(equation = var_3360_equation_0, values = (var_3276_cast_fp16_9, var_3334_cast_fp16))[name = tensor<string, []>("op_3360_cast_fp16")];
+            tensor<string, []> var_3362_equation_0 = const()[name = tensor<string, []>("op_3362_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3362_cast_fp16 = einsum(equation = var_3362_equation_0, values = (var_3276_cast_fp16_10, var_3335_cast_fp16))[name = tensor<string, []>("op_3362_cast_fp16")];
+            tensor<string, []> var_3364_equation_0 = const()[name = tensor<string, []>("op_3364_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3364_cast_fp16 = einsum(equation = var_3364_equation_0, values = (var_3276_cast_fp16_11, var_3336_cast_fp16))[name = tensor<string, []>("op_3364_cast_fp16")];
+            tensor<string, []> var_3366_equation_0 = const()[name = tensor<string, []>("op_3366_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3366_cast_fp16 = einsum(equation = var_3366_equation_0, values = (var_3276_cast_fp16_12, var_3337_cast_fp16))[name = tensor<string, []>("op_3366_cast_fp16")];
+            tensor<string, []> var_3368_equation_0 = const()[name = tensor<string, []>("op_3368_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3368_cast_fp16 = einsum(equation = var_3368_equation_0, values = (var_3276_cast_fp16_13, var_3338_cast_fp16))[name = tensor<string, []>("op_3368_cast_fp16")];
+            tensor<string, []> var_3370_equation_0 = const()[name = tensor<string, []>("op_3370_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3370_cast_fp16 = einsum(equation = var_3370_equation_0, values = (var_3276_cast_fp16_14, var_3339_cast_fp16))[name = tensor<string, []>("op_3370_cast_fp16")];
+            tensor<string, []> var_3372_equation_0 = const()[name = tensor<string, []>("op_3372_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3372_cast_fp16 = einsum(equation = var_3372_equation_0, values = (var_3276_cast_fp16_15, var_3340_cast_fp16))[name = tensor<string, []>("op_3372_cast_fp16")];
+            tensor<bool, []> input_135_interleave_0 = const()[name = tensor<string, []>("input_135_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_135_cast_fp16 = concat(axis = var_3189, interleave = input_135_interleave_0, values = (var_3342_cast_fp16, var_3344_cast_fp16, var_3346_cast_fp16, var_3348_cast_fp16, var_3350_cast_fp16, var_3352_cast_fp16, var_3354_cast_fp16, var_3356_cast_fp16, var_3358_cast_fp16, var_3360_cast_fp16, var_3362_cast_fp16, var_3364_cast_fp16, var_3366_cast_fp16, var_3368_cast_fp16, var_3370_cast_fp16, var_3372_cast_fp16))[name = tensor<string, []>("input_135_cast_fp16")];
+            tensor<string, []> var_3381_pad_type_0 = const()[name = tensor<string, []>("op_3381_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3381_strides_0 = const()[name = tensor<string, []>("op_3381_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3381_pad_0 = const()[name = tensor<string, []>("op_3381_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3381_dilations_0 = const()[name = tensor<string, []>("op_3381_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3381_groups_0 = const()[name = tensor<string, []>("op_3381_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_13_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(343647232)))];
+            tensor<fp16, [1024]> blocks_13_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(345744448)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3381_cast_fp16 = conv(bias = blocks_13_attn_out_bias_to_fp16, dilations = var_3381_dilations_0, groups = var_3381_groups_0, pad = var_3381_pad_0, pad_type = var_3381_pad_type_0, strides = var_3381_strides_0, weight = blocks_13_attn_out_weight_to_fp16, x = input_135_cast_fp16)[name = tensor<string, []>("op_3381_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = var_3381_cast_fp16)[name = tensor<string, []>("inputs_55_cast_fp16")];
+            tensor<int32, [1]> input_137_axes_0 = const()[name = tensor<string, []>("input_137_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_137_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_137_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(345746560)))];
+            tensor<fp16, [1024]> input_137_beta_0_to_fp16 = const()[name = tensor<string, []>("input_137_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(345748672)))];
+            tensor<fp16, []> var_3391_to_fp16 = const()[name = tensor<string, []>("op_3391_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_137_cast_fp16 = layer_norm(axes = input_137_axes_0, beta = input_137_beta_0_to_fp16, epsilon = var_3391_to_fp16, gamma = input_137_gamma_0_to_fp16, x = inputs_55_cast_fp16)[name = tensor<string, []>("input_137_cast_fp16")];
+            tensor<string, []> input_139_pad_type_0 = const()[name = tensor<string, []>("input_139_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_139_strides_0 = const()[name = tensor<string, []>("input_139_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_139_pad_0 = const()[name = tensor<string, []>("input_139_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_139_dilations_0 = const()[name = tensor<string, []>("input_139_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_139_groups_0 = const()[name = tensor<string, []>("input_139_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_13_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(345750784)))];
+            tensor<fp16, [4096]> blocks_13_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(354139456)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_139_cast_fp16 = conv(bias = blocks_13_mlp_0_bias_to_fp16, dilations = input_139_dilations_0, groups = input_139_groups_0, pad = input_139_pad_0, pad_type = input_139_pad_type_0, strides = input_139_strides_0, weight = blocks_13_mlp_0_weight_to_fp16, x = input_137_cast_fp16)[name = tensor<string, []>("input_139_cast_fp16")];
+            tensor<string, []> input_141_mode_0 = const()[name = tensor<string, []>("input_141_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_141_cast_fp16 = gelu(mode = input_141_mode_0, x = input_139_cast_fp16)[name = tensor<string, []>("input_141_cast_fp16")];
+            tensor<string, []> var_3417_pad_type_0 = const()[name = tensor<string, []>("op_3417_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3417_strides_0 = const()[name = tensor<string, []>("op_3417_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3417_pad_0 = const()[name = tensor<string, []>("op_3417_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3417_dilations_0 = const()[name = tensor<string, []>("op_3417_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3417_groups_0 = const()[name = tensor<string, []>("op_3417_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_13_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(354147712)))];
+            tensor<fp16, [1024]> blocks_13_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362536384)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3417_cast_fp16 = conv(bias = blocks_13_mlp_2_bias_to_fp16, dilations = var_3417_dilations_0, groups = var_3417_groups_0, pad = var_3417_pad_0, pad_type = var_3417_pad_type_0, strides = var_3417_strides_0, weight = blocks_13_mlp_2_weight_to_fp16, x = input_141_cast_fp16)[name = tensor<string, []>("op_3417_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = var_3417_cast_fp16)[name = tensor<string, []>("inputs_57_cast_fp16")];
+            tensor<int32, []> var_3426 = const()[name = tensor<string, []>("op_3426"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_143_axes_0 = const()[name = tensor<string, []>("input_143_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_143_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_143_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362538496)))];
+            tensor<fp16, [1024]> input_143_beta_0_to_fp16 = const()[name = tensor<string, []>("input_143_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362540608)))];
+            tensor<fp16, []> var_3442_to_fp16 = const()[name = tensor<string, []>("op_3442_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_143_cast_fp16 = layer_norm(axes = input_143_axes_0, beta = input_143_beta_0_to_fp16, epsilon = var_3442_to_fp16, gamma = input_143_gamma_0_to_fp16, x = inputs_57_cast_fp16)[name = tensor<string, []>("input_143_cast_fp16")];
+            tensor<string, []> q_29_pad_type_0 = const()[name = tensor<string, []>("q_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_29_strides_0 = const()[name = tensor<string, []>("q_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_29_pad_0 = const()[name = tensor<string, []>("q_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_29_dilations_0 = const()[name = tensor<string, []>("q_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_29_groups_0 = const()[name = tensor<string, []>("q_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_3477_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3477_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362542720)))];
+            tensor<fp16, [1024]> var_3477_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3477_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364639936)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3477_cast_fp16 = conv(bias = var_3477_bias_0_to_fp16, dilations = q_29_dilations_0, groups = q_29_groups_0, pad = q_29_pad_0, pad_type = q_29_pad_type_0, strides = q_29_strides_0, weight = var_3477_weight_0_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("op_3477_cast_fp16")];
+            tensor<string, []> k_29_pad_type_0 = const()[name = tensor<string, []>("k_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_29_strides_0 = const()[name = tensor<string, []>("k_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_29_pad_0 = const()[name = tensor<string, []>("k_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_29_dilations_0 = const()[name = tensor<string, []>("k_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_29_groups_0 = const()[name = tensor<string, []>("k_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_14_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364642048)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_29_cast_fp16 = conv(dilations = k_29_dilations_0, groups = k_29_groups_0, pad = k_29_pad_0, pad_type = k_29_pad_type_0, strides = k_29_strides_0, weight = blocks_14_attn_key_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("k_29_cast_fp16")];
+            tensor<string, []> var_3475_pad_type_0 = const()[name = tensor<string, []>("op_3475_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3475_strides_0 = const()[name = tensor<string, []>("op_3475_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3475_pad_0 = const()[name = tensor<string, []>("op_3475_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3475_dilations_0 = const()[name = tensor<string, []>("op_3475_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3475_groups_0 = const()[name = tensor<string, []>("op_3475_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_14_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(366739264)))];
+            tensor<fp16, [1024]> blocks_14_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368836480)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3475_cast_fp16 = conv(bias = blocks_14_attn_value_bias_to_fp16, dilations = var_3475_dilations_0, groups = var_3475_groups_0, pad = var_3475_pad_0, pad_type = var_3475_pad_type_0, strides = var_3475_strides_0, weight = blocks_14_attn_value_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("op_3475_cast_fp16")];
+            tensor<int32, [16]> tile_42 = const()[name = tensor<string, []>("tile_42"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3478_axis_0 = const()[name = tensor<string, []>("op_3478_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_15 = split(axis = var_3478_axis_0, split_sizes = tile_42, x = var_3477_cast_fp16)[name = tensor<string, []>("op_3478_cast_fp16")];
+            tensor<int32, [4]> var_3495_perm_0 = const()[name = tensor<string, []>("op_3495_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_43 = const()[name = tensor<string, []>("tile_43"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3496_axis_0 = const()[name = tensor<string, []>("op_3496_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_3495_cast_fp16 = transpose(perm = var_3495_perm_0, x = k_29_cast_fp16)[name = tensor<string, []>("transpose_10")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_15 = split(axis = var_3496_axis_0, split_sizes = tile_43, x = var_3495_cast_fp16)[name = tensor<string, []>("op_3496_cast_fp16")];
+            tensor<int32, [16]> tile_44 = const()[name = tensor<string, []>("tile_44"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3513_axis_0 = const()[name = tensor<string, []>("op_3513_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_15 = split(axis = var_3513_axis_0, split_sizes = tile_44, x = var_3475_cast_fp16)[name = tensor<string, []>("op_3513_cast_fp16")];
+            tensor<string, []> aw_449_equation_0 = const()[name = tensor<string, []>("aw_449_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_449_cast_fp16 = einsum(equation = aw_449_equation_0, values = (var_3496_cast_fp16_0, var_3478_cast_fp16_0))[name = tensor<string, []>("aw_449_cast_fp16")];
+            tensor<string, []> aw_451_equation_0 = const()[name = tensor<string, []>("aw_451_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_451_cast_fp16 = einsum(equation = aw_451_equation_0, values = (var_3496_cast_fp16_1, var_3478_cast_fp16_1))[name = tensor<string, []>("aw_451_cast_fp16")];
+            tensor<string, []> aw_453_equation_0 = const()[name = tensor<string, []>("aw_453_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_453_cast_fp16 = einsum(equation = aw_453_equation_0, values = (var_3496_cast_fp16_2, var_3478_cast_fp16_2))[name = tensor<string, []>("aw_453_cast_fp16")];
+            tensor<string, []> aw_455_equation_0 = const()[name = tensor<string, []>("aw_455_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_455_cast_fp16 = einsum(equation = aw_455_equation_0, values = (var_3496_cast_fp16_3, var_3478_cast_fp16_3))[name = tensor<string, []>("aw_455_cast_fp16")];
+            tensor<string, []> aw_457_equation_0 = const()[name = tensor<string, []>("aw_457_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_457_cast_fp16 = einsum(equation = aw_457_equation_0, values = (var_3496_cast_fp16_4, var_3478_cast_fp16_4))[name = tensor<string, []>("aw_457_cast_fp16")];
+            tensor<string, []> aw_459_equation_0 = const()[name = tensor<string, []>("aw_459_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_459_cast_fp16 = einsum(equation = aw_459_equation_0, values = (var_3496_cast_fp16_5, var_3478_cast_fp16_5))[name = tensor<string, []>("aw_459_cast_fp16")];
+            tensor<string, []> aw_461_equation_0 = const()[name = tensor<string, []>("aw_461_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_461_cast_fp16 = einsum(equation = aw_461_equation_0, values = (var_3496_cast_fp16_6, var_3478_cast_fp16_6))[name = tensor<string, []>("aw_461_cast_fp16")];
+            tensor<string, []> aw_463_equation_0 = const()[name = tensor<string, []>("aw_463_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_463_cast_fp16 = einsum(equation = aw_463_equation_0, values = (var_3496_cast_fp16_7, var_3478_cast_fp16_7))[name = tensor<string, []>("aw_463_cast_fp16")];
+            tensor<string, []> aw_465_equation_0 = const()[name = tensor<string, []>("aw_465_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_465_cast_fp16 = einsum(equation = aw_465_equation_0, values = (var_3496_cast_fp16_8, var_3478_cast_fp16_8))[name = tensor<string, []>("aw_465_cast_fp16")];
+            tensor<string, []> aw_467_equation_0 = const()[name = tensor<string, []>("aw_467_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_467_cast_fp16 = einsum(equation = aw_467_equation_0, values = (var_3496_cast_fp16_9, var_3478_cast_fp16_9))[name = tensor<string, []>("aw_467_cast_fp16")];
+            tensor<string, []> aw_469_equation_0 = const()[name = tensor<string, []>("aw_469_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_469_cast_fp16 = einsum(equation = aw_469_equation_0, values = (var_3496_cast_fp16_10, var_3478_cast_fp16_10))[name = tensor<string, []>("aw_469_cast_fp16")];
+            tensor<string, []> aw_471_equation_0 = const()[name = tensor<string, []>("aw_471_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_471_cast_fp16 = einsum(equation = aw_471_equation_0, values = (var_3496_cast_fp16_11, var_3478_cast_fp16_11))[name = tensor<string, []>("aw_471_cast_fp16")];
+            tensor<string, []> aw_473_equation_0 = const()[name = tensor<string, []>("aw_473_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_473_cast_fp16 = einsum(equation = aw_473_equation_0, values = (var_3496_cast_fp16_12, var_3478_cast_fp16_12))[name = tensor<string, []>("aw_473_cast_fp16")];
+            tensor<string, []> aw_475_equation_0 = const()[name = tensor<string, []>("aw_475_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_475_cast_fp16 = einsum(equation = aw_475_equation_0, values = (var_3496_cast_fp16_13, var_3478_cast_fp16_13))[name = tensor<string, []>("aw_475_cast_fp16")];
+            tensor<string, []> aw_477_equation_0 = const()[name = tensor<string, []>("aw_477_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_477_cast_fp16 = einsum(equation = aw_477_equation_0, values = (var_3496_cast_fp16_14, var_3478_cast_fp16_14))[name = tensor<string, []>("aw_477_cast_fp16")];
+            tensor<string, []> aw_479_equation_0 = const()[name = tensor<string, []>("aw_479_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_479_cast_fp16 = einsum(equation = aw_479_equation_0, values = (var_3496_cast_fp16_15, var_3478_cast_fp16_15))[name = tensor<string, []>("aw_479_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3562_cast_fp16 = softmax(axis = var_3426, x = aw_449_cast_fp16)[name = tensor<string, []>("op_3562_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3563_cast_fp16 = softmax(axis = var_3426, x = aw_451_cast_fp16)[name = tensor<string, []>("op_3563_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3564_cast_fp16 = softmax(axis = var_3426, x = aw_453_cast_fp16)[name = tensor<string, []>("op_3564_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3565_cast_fp16 = softmax(axis = var_3426, x = aw_455_cast_fp16)[name = tensor<string, []>("op_3565_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3566_cast_fp16 = softmax(axis = var_3426, x = aw_457_cast_fp16)[name = tensor<string, []>("op_3566_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3567_cast_fp16 = softmax(axis = var_3426, x = aw_459_cast_fp16)[name = tensor<string, []>("op_3567_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3568_cast_fp16 = softmax(axis = var_3426, x = aw_461_cast_fp16)[name = tensor<string, []>("op_3568_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3569_cast_fp16 = softmax(axis = var_3426, x = aw_463_cast_fp16)[name = tensor<string, []>("op_3569_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3570_cast_fp16 = softmax(axis = var_3426, x = aw_465_cast_fp16)[name = tensor<string, []>("op_3570_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3571_cast_fp16 = softmax(axis = var_3426, x = aw_467_cast_fp16)[name = tensor<string, []>("op_3571_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3572_cast_fp16 = softmax(axis = var_3426, x = aw_469_cast_fp16)[name = tensor<string, []>("op_3572_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3573_cast_fp16 = softmax(axis = var_3426, x = aw_471_cast_fp16)[name = tensor<string, []>("op_3573_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3574_cast_fp16 = softmax(axis = var_3426, x = aw_473_cast_fp16)[name = tensor<string, []>("op_3574_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3575_cast_fp16 = softmax(axis = var_3426, x = aw_475_cast_fp16)[name = tensor<string, []>("op_3575_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3576_cast_fp16 = softmax(axis = var_3426, x = aw_477_cast_fp16)[name = tensor<string, []>("op_3576_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3577_cast_fp16 = softmax(axis = var_3426, x = aw_479_cast_fp16)[name = tensor<string, []>("op_3577_cast_fp16")];
+            tensor<string, []> var_3579_equation_0 = const()[name = tensor<string, []>("op_3579_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3579_cast_fp16 = einsum(equation = var_3579_equation_0, values = (var_3513_cast_fp16_0, var_3562_cast_fp16))[name = tensor<string, []>("op_3579_cast_fp16")];
+            tensor<string, []> var_3581_equation_0 = const()[name = tensor<string, []>("op_3581_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3581_cast_fp16 = einsum(equation = var_3581_equation_0, values = (var_3513_cast_fp16_1, var_3563_cast_fp16))[name = tensor<string, []>("op_3581_cast_fp16")];
+            tensor<string, []> var_3583_equation_0 = const()[name = tensor<string, []>("op_3583_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3583_cast_fp16 = einsum(equation = var_3583_equation_0, values = (var_3513_cast_fp16_2, var_3564_cast_fp16))[name = tensor<string, []>("op_3583_cast_fp16")];
+            tensor<string, []> var_3585_equation_0 = const()[name = tensor<string, []>("op_3585_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3585_cast_fp16 = einsum(equation = var_3585_equation_0, values = (var_3513_cast_fp16_3, var_3565_cast_fp16))[name = tensor<string, []>("op_3585_cast_fp16")];
+            tensor<string, []> var_3587_equation_0 = const()[name = tensor<string, []>("op_3587_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3587_cast_fp16 = einsum(equation = var_3587_equation_0, values = (var_3513_cast_fp16_4, var_3566_cast_fp16))[name = tensor<string, []>("op_3587_cast_fp16")];
+            tensor<string, []> var_3589_equation_0 = const()[name = tensor<string, []>("op_3589_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3589_cast_fp16 = einsum(equation = var_3589_equation_0, values = (var_3513_cast_fp16_5, var_3567_cast_fp16))[name = tensor<string, []>("op_3589_cast_fp16")];
+            tensor<string, []> var_3591_equation_0 = const()[name = tensor<string, []>("op_3591_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3591_cast_fp16 = einsum(equation = var_3591_equation_0, values = (var_3513_cast_fp16_6, var_3568_cast_fp16))[name = tensor<string, []>("op_3591_cast_fp16")];
+            tensor<string, []> var_3593_equation_0 = const()[name = tensor<string, []>("op_3593_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3593_cast_fp16 = einsum(equation = var_3593_equation_0, values = (var_3513_cast_fp16_7, var_3569_cast_fp16))[name = tensor<string, []>("op_3593_cast_fp16")];
+            tensor<string, []> var_3595_equation_0 = const()[name = tensor<string, []>("op_3595_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3595_cast_fp16 = einsum(equation = var_3595_equation_0, values = (var_3513_cast_fp16_8, var_3570_cast_fp16))[name = tensor<string, []>("op_3595_cast_fp16")];
+            tensor<string, []> var_3597_equation_0 = const()[name = tensor<string, []>("op_3597_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3597_cast_fp16 = einsum(equation = var_3597_equation_0, values = (var_3513_cast_fp16_9, var_3571_cast_fp16))[name = tensor<string, []>("op_3597_cast_fp16")];
+            tensor<string, []> var_3599_equation_0 = const()[name = tensor<string, []>("op_3599_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3599_cast_fp16 = einsum(equation = var_3599_equation_0, values = (var_3513_cast_fp16_10, var_3572_cast_fp16))[name = tensor<string, []>("op_3599_cast_fp16")];
+            tensor<string, []> var_3601_equation_0 = const()[name = tensor<string, []>("op_3601_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3601_cast_fp16 = einsum(equation = var_3601_equation_0, values = (var_3513_cast_fp16_11, var_3573_cast_fp16))[name = tensor<string, []>("op_3601_cast_fp16")];
+            tensor<string, []> var_3603_equation_0 = const()[name = tensor<string, []>("op_3603_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3603_cast_fp16 = einsum(equation = var_3603_equation_0, values = (var_3513_cast_fp16_12, var_3574_cast_fp16))[name = tensor<string, []>("op_3603_cast_fp16")];
+            tensor<string, []> var_3605_equation_0 = const()[name = tensor<string, []>("op_3605_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3605_cast_fp16 = einsum(equation = var_3605_equation_0, values = (var_3513_cast_fp16_13, var_3575_cast_fp16))[name = tensor<string, []>("op_3605_cast_fp16")];
+            tensor<string, []> var_3607_equation_0 = const()[name = tensor<string, []>("op_3607_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3607_cast_fp16 = einsum(equation = var_3607_equation_0, values = (var_3513_cast_fp16_14, var_3576_cast_fp16))[name = tensor<string, []>("op_3607_cast_fp16")];
+            tensor<string, []> var_3609_equation_0 = const()[name = tensor<string, []>("op_3609_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3609_cast_fp16 = einsum(equation = var_3609_equation_0, values = (var_3513_cast_fp16_15, var_3577_cast_fp16))[name = tensor<string, []>("op_3609_cast_fp16")];
+            tensor<bool, []> input_145_interleave_0 = const()[name = tensor<string, []>("input_145_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_145_cast_fp16 = concat(axis = var_3426, interleave = input_145_interleave_0, values = (var_3579_cast_fp16, var_3581_cast_fp16, var_3583_cast_fp16, var_3585_cast_fp16, var_3587_cast_fp16, var_3589_cast_fp16, var_3591_cast_fp16, var_3593_cast_fp16, var_3595_cast_fp16, var_3597_cast_fp16, var_3599_cast_fp16, var_3601_cast_fp16, var_3603_cast_fp16, var_3605_cast_fp16, var_3607_cast_fp16, var_3609_cast_fp16))[name = tensor<string, []>("input_145_cast_fp16")];
+            tensor<string, []> var_3618_pad_type_0 = const()[name = tensor<string, []>("op_3618_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3618_strides_0 = const()[name = tensor<string, []>("op_3618_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3618_pad_0 = const()[name = tensor<string, []>("op_3618_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3618_dilations_0 = const()[name = tensor<string, []>("op_3618_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3618_groups_0 = const()[name = tensor<string, []>("op_3618_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_14_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368838592)))];
+            tensor<fp16, [1024]> blocks_14_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(370935808)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3618_cast_fp16 = conv(bias = blocks_14_attn_out_bias_to_fp16, dilations = var_3618_dilations_0, groups = var_3618_groups_0, pad = var_3618_pad_0, pad_type = var_3618_pad_type_0, strides = var_3618_strides_0, weight = blocks_14_attn_out_weight_to_fp16, x = input_145_cast_fp16)[name = tensor<string, []>("op_3618_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = var_3618_cast_fp16)[name = tensor<string, []>("inputs_59_cast_fp16")];
+            tensor<int32, [1]> input_147_axes_0 = const()[name = tensor<string, []>("input_147_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_147_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_147_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(370937920)))];
+            tensor<fp16, [1024]> input_147_beta_0_to_fp16 = const()[name = tensor<string, []>("input_147_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(370940032)))];
+            tensor<fp16, []> var_3628_to_fp16 = const()[name = tensor<string, []>("op_3628_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_147_cast_fp16 = layer_norm(axes = input_147_axes_0, beta = input_147_beta_0_to_fp16, epsilon = var_3628_to_fp16, gamma = input_147_gamma_0_to_fp16, x = inputs_59_cast_fp16)[name = tensor<string, []>("input_147_cast_fp16")];
+            tensor<string, []> input_149_pad_type_0 = const()[name = tensor<string, []>("input_149_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_149_strides_0 = const()[name = tensor<string, []>("input_149_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_149_pad_0 = const()[name = tensor<string, []>("input_149_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_149_dilations_0 = const()[name = tensor<string, []>("input_149_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_149_groups_0 = const()[name = tensor<string, []>("input_149_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_14_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(370942144)))];
+            tensor<fp16, [4096]> blocks_14_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(379330816)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_149_cast_fp16 = conv(bias = blocks_14_mlp_0_bias_to_fp16, dilations = input_149_dilations_0, groups = input_149_groups_0, pad = input_149_pad_0, pad_type = input_149_pad_type_0, strides = input_149_strides_0, weight = blocks_14_mlp_0_weight_to_fp16, x = input_147_cast_fp16)[name = tensor<string, []>("input_149_cast_fp16")];
+            tensor<string, []> input_151_mode_0 = const()[name = tensor<string, []>("input_151_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_151_cast_fp16 = gelu(mode = input_151_mode_0, x = input_149_cast_fp16)[name = tensor<string, []>("input_151_cast_fp16")];
+            tensor<string, []> var_3654_pad_type_0 = const()[name = tensor<string, []>("op_3654_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3654_strides_0 = const()[name = tensor<string, []>("op_3654_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3654_pad_0 = const()[name = tensor<string, []>("op_3654_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3654_dilations_0 = const()[name = tensor<string, []>("op_3654_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3654_groups_0 = const()[name = tensor<string, []>("op_3654_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_14_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(379339072)))];
+            tensor<fp16, [1024]> blocks_14_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(387727744)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3654_cast_fp16 = conv(bias = blocks_14_mlp_2_bias_to_fp16, dilations = var_3654_dilations_0, groups = var_3654_groups_0, pad = var_3654_pad_0, pad_type = var_3654_pad_type_0, strides = var_3654_strides_0, weight = blocks_14_mlp_2_weight_to_fp16, x = input_151_cast_fp16)[name = tensor<string, []>("op_3654_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = var_3654_cast_fp16)[name = tensor<string, []>("inputs_61_cast_fp16")];
+            tensor<int32, []> var_3663 = const()[name = tensor<string, []>("op_3663"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_153_axes_0 = const()[name = tensor<string, []>("input_153_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_153_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_153_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(387729856)))];
+            tensor<fp16, [1024]> input_153_beta_0_to_fp16 = const()[name = tensor<string, []>("input_153_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(387731968)))];
+            tensor<fp16, []> var_3679_to_fp16 = const()[name = tensor<string, []>("op_3679_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_153_cast_fp16 = layer_norm(axes = input_153_axes_0, beta = input_153_beta_0_to_fp16, epsilon = var_3679_to_fp16, gamma = input_153_gamma_0_to_fp16, x = inputs_61_cast_fp16)[name = tensor<string, []>("input_153_cast_fp16")];
+            tensor<string, []> q_31_pad_type_0 = const()[name = tensor<string, []>("q_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_31_strides_0 = const()[name = tensor<string, []>("q_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_31_pad_0 = const()[name = tensor<string, []>("q_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_31_dilations_0 = const()[name = tensor<string, []>("q_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_31_groups_0 = const()[name = tensor<string, []>("q_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_3714_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3714_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(387734080)))];
+            tensor<fp16, [1024]> var_3714_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3714_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(389831296)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3714_cast_fp16 = conv(bias = var_3714_bias_0_to_fp16, dilations = q_31_dilations_0, groups = q_31_groups_0, pad = q_31_pad_0, pad_type = q_31_pad_type_0, strides = q_31_strides_0, weight = var_3714_weight_0_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("op_3714_cast_fp16")];
+            tensor<string, []> k_31_pad_type_0 = const()[name = tensor<string, []>("k_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_31_strides_0 = const()[name = tensor<string, []>("k_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_31_pad_0 = const()[name = tensor<string, []>("k_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_31_dilations_0 = const()[name = tensor<string, []>("k_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_31_groups_0 = const()[name = tensor<string, []>("k_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_15_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(389833408)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_31_cast_fp16 = conv(dilations = k_31_dilations_0, groups = k_31_groups_0, pad = k_31_pad_0, pad_type = k_31_pad_type_0, strides = k_31_strides_0, weight = blocks_15_attn_key_weight_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("k_31_cast_fp16")];
+            tensor<string, []> var_3712_pad_type_0 = const()[name = tensor<string, []>("op_3712_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3712_strides_0 = const()[name = tensor<string, []>("op_3712_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3712_pad_0 = const()[name = tensor<string, []>("op_3712_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3712_dilations_0 = const()[name = tensor<string, []>("op_3712_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3712_groups_0 = const()[name = tensor<string, []>("op_3712_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_15_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(391930624)))];
+            tensor<fp16, [1024]> blocks_15_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(394027840)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3712_cast_fp16 = conv(bias = blocks_15_attn_value_bias_to_fp16, dilations = var_3712_dilations_0, groups = var_3712_groups_0, pad = var_3712_pad_0, pad_type = var_3712_pad_type_0, strides = var_3712_strides_0, weight = blocks_15_attn_value_weight_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("op_3712_cast_fp16")];
+            tensor<int32, [16]> tile_45 = const()[name = tensor<string, []>("tile_45"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3715_axis_0 = const()[name = tensor<string, []>("op_3715_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_15 = split(axis = var_3715_axis_0, split_sizes = tile_45, x = var_3714_cast_fp16)[name = tensor<string, []>("op_3715_cast_fp16")];
+            tensor<int32, [4]> var_3732_perm_0 = const()[name = tensor<string, []>("op_3732_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_46 = const()[name = tensor<string, []>("tile_46"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3733_axis_0 = const()[name = tensor<string, []>("op_3733_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_3732_cast_fp16 = transpose(perm = var_3732_perm_0, x = k_31_cast_fp16)[name = tensor<string, []>("transpose_9")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_15 = split(axis = var_3733_axis_0, split_sizes = tile_46, x = var_3732_cast_fp16)[name = tensor<string, []>("op_3733_cast_fp16")];
+            tensor<int32, [16]> tile_47 = const()[name = tensor<string, []>("tile_47"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3750_axis_0 = const()[name = tensor<string, []>("op_3750_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_15 = split(axis = var_3750_axis_0, split_sizes = tile_47, x = var_3712_cast_fp16)[name = tensor<string, []>("op_3750_cast_fp16")];
+            tensor<string, []> aw_481_equation_0 = const()[name = tensor<string, []>("aw_481_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_481_cast_fp16 = einsum(equation = aw_481_equation_0, values = (var_3733_cast_fp16_0, var_3715_cast_fp16_0))[name = tensor<string, []>("aw_481_cast_fp16")];
+            tensor<string, []> aw_483_equation_0 = const()[name = tensor<string, []>("aw_483_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_483_cast_fp16 = einsum(equation = aw_483_equation_0, values = (var_3733_cast_fp16_1, var_3715_cast_fp16_1))[name = tensor<string, []>("aw_483_cast_fp16")];
+            tensor<string, []> aw_485_equation_0 = const()[name = tensor<string, []>("aw_485_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_485_cast_fp16 = einsum(equation = aw_485_equation_0, values = (var_3733_cast_fp16_2, var_3715_cast_fp16_2))[name = tensor<string, []>("aw_485_cast_fp16")];
+            tensor<string, []> aw_487_equation_0 = const()[name = tensor<string, []>("aw_487_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_487_cast_fp16 = einsum(equation = aw_487_equation_0, values = (var_3733_cast_fp16_3, var_3715_cast_fp16_3))[name = tensor<string, []>("aw_487_cast_fp16")];
+            tensor<string, []> aw_489_equation_0 = const()[name = tensor<string, []>("aw_489_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_489_cast_fp16 = einsum(equation = aw_489_equation_0, values = (var_3733_cast_fp16_4, var_3715_cast_fp16_4))[name = tensor<string, []>("aw_489_cast_fp16")];
+            tensor<string, []> aw_491_equation_0 = const()[name = tensor<string, []>("aw_491_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_491_cast_fp16 = einsum(equation = aw_491_equation_0, values = (var_3733_cast_fp16_5, var_3715_cast_fp16_5))[name = tensor<string, []>("aw_491_cast_fp16")];
+            tensor<string, []> aw_493_equation_0 = const()[name = tensor<string, []>("aw_493_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_493_cast_fp16 = einsum(equation = aw_493_equation_0, values = (var_3733_cast_fp16_6, var_3715_cast_fp16_6))[name = tensor<string, []>("aw_493_cast_fp16")];
+            tensor<string, []> aw_495_equation_0 = const()[name = tensor<string, []>("aw_495_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_495_cast_fp16 = einsum(equation = aw_495_equation_0, values = (var_3733_cast_fp16_7, var_3715_cast_fp16_7))[name = tensor<string, []>("aw_495_cast_fp16")];
+            tensor<string, []> aw_497_equation_0 = const()[name = tensor<string, []>("aw_497_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_497_cast_fp16 = einsum(equation = aw_497_equation_0, values = (var_3733_cast_fp16_8, var_3715_cast_fp16_8))[name = tensor<string, []>("aw_497_cast_fp16")];
+            tensor<string, []> aw_499_equation_0 = const()[name = tensor<string, []>("aw_499_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_499_cast_fp16 = einsum(equation = aw_499_equation_0, values = (var_3733_cast_fp16_9, var_3715_cast_fp16_9))[name = tensor<string, []>("aw_499_cast_fp16")];
+            tensor<string, []> aw_501_equation_0 = const()[name = tensor<string, []>("aw_501_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_501_cast_fp16 = einsum(equation = aw_501_equation_0, values = (var_3733_cast_fp16_10, var_3715_cast_fp16_10))[name = tensor<string, []>("aw_501_cast_fp16")];
+            tensor<string, []> aw_503_equation_0 = const()[name = tensor<string, []>("aw_503_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_503_cast_fp16 = einsum(equation = aw_503_equation_0, values = (var_3733_cast_fp16_11, var_3715_cast_fp16_11))[name = tensor<string, []>("aw_503_cast_fp16")];
+            tensor<string, []> aw_505_equation_0 = const()[name = tensor<string, []>("aw_505_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_505_cast_fp16 = einsum(equation = aw_505_equation_0, values = (var_3733_cast_fp16_12, var_3715_cast_fp16_12))[name = tensor<string, []>("aw_505_cast_fp16")];
+            tensor<string, []> aw_507_equation_0 = const()[name = tensor<string, []>("aw_507_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_507_cast_fp16 = einsum(equation = aw_507_equation_0, values = (var_3733_cast_fp16_13, var_3715_cast_fp16_13))[name = tensor<string, []>("aw_507_cast_fp16")];
+            tensor<string, []> aw_509_equation_0 = const()[name = tensor<string, []>("aw_509_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_509_cast_fp16 = einsum(equation = aw_509_equation_0, values = (var_3733_cast_fp16_14, var_3715_cast_fp16_14))[name = tensor<string, []>("aw_509_cast_fp16")];
+            tensor<string, []> aw_511_equation_0 = const()[name = tensor<string, []>("aw_511_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_511_cast_fp16 = einsum(equation = aw_511_equation_0, values = (var_3733_cast_fp16_15, var_3715_cast_fp16_15))[name = tensor<string, []>("aw_511_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3799_cast_fp16 = softmax(axis = var_3663, x = aw_481_cast_fp16)[name = tensor<string, []>("op_3799_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3800_cast_fp16 = softmax(axis = var_3663, x = aw_483_cast_fp16)[name = tensor<string, []>("op_3800_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3801_cast_fp16 = softmax(axis = var_3663, x = aw_485_cast_fp16)[name = tensor<string, []>("op_3801_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3802_cast_fp16 = softmax(axis = var_3663, x = aw_487_cast_fp16)[name = tensor<string, []>("op_3802_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3803_cast_fp16 = softmax(axis = var_3663, x = aw_489_cast_fp16)[name = tensor<string, []>("op_3803_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3804_cast_fp16 = softmax(axis = var_3663, x = aw_491_cast_fp16)[name = tensor<string, []>("op_3804_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3805_cast_fp16 = softmax(axis = var_3663, x = aw_493_cast_fp16)[name = tensor<string, []>("op_3805_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3806_cast_fp16 = softmax(axis = var_3663, x = aw_495_cast_fp16)[name = tensor<string, []>("op_3806_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3807_cast_fp16 = softmax(axis = var_3663, x = aw_497_cast_fp16)[name = tensor<string, []>("op_3807_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3808_cast_fp16 = softmax(axis = var_3663, x = aw_499_cast_fp16)[name = tensor<string, []>("op_3808_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3809_cast_fp16 = softmax(axis = var_3663, x = aw_501_cast_fp16)[name = tensor<string, []>("op_3809_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3810_cast_fp16 = softmax(axis = var_3663, x = aw_503_cast_fp16)[name = tensor<string, []>("op_3810_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3811_cast_fp16 = softmax(axis = var_3663, x = aw_505_cast_fp16)[name = tensor<string, []>("op_3811_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3812_cast_fp16 = softmax(axis = var_3663, x = aw_507_cast_fp16)[name = tensor<string, []>("op_3812_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3813_cast_fp16 = softmax(axis = var_3663, x = aw_509_cast_fp16)[name = tensor<string, []>("op_3813_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3814_cast_fp16 = softmax(axis = var_3663, x = aw_511_cast_fp16)[name = tensor<string, []>("op_3814_cast_fp16")];
+            tensor<string, []> var_3816_equation_0 = const()[name = tensor<string, []>("op_3816_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3816_cast_fp16 = einsum(equation = var_3816_equation_0, values = (var_3750_cast_fp16_0, var_3799_cast_fp16))[name = tensor<string, []>("op_3816_cast_fp16")];
+            tensor<string, []> var_3818_equation_0 = const()[name = tensor<string, []>("op_3818_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3818_cast_fp16 = einsum(equation = var_3818_equation_0, values = (var_3750_cast_fp16_1, var_3800_cast_fp16))[name = tensor<string, []>("op_3818_cast_fp16")];
+            tensor<string, []> var_3820_equation_0 = const()[name = tensor<string, []>("op_3820_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3820_cast_fp16 = einsum(equation = var_3820_equation_0, values = (var_3750_cast_fp16_2, var_3801_cast_fp16))[name = tensor<string, []>("op_3820_cast_fp16")];
+            tensor<string, []> var_3822_equation_0 = const()[name = tensor<string, []>("op_3822_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3822_cast_fp16 = einsum(equation = var_3822_equation_0, values = (var_3750_cast_fp16_3, var_3802_cast_fp16))[name = tensor<string, []>("op_3822_cast_fp16")];
+            tensor<string, []> var_3824_equation_0 = const()[name = tensor<string, []>("op_3824_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3824_cast_fp16 = einsum(equation = var_3824_equation_0, values = (var_3750_cast_fp16_4, var_3803_cast_fp16))[name = tensor<string, []>("op_3824_cast_fp16")];
+            tensor<string, []> var_3826_equation_0 = const()[name = tensor<string, []>("op_3826_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3826_cast_fp16 = einsum(equation = var_3826_equation_0, values = (var_3750_cast_fp16_5, var_3804_cast_fp16))[name = tensor<string, []>("op_3826_cast_fp16")];
+            tensor<string, []> var_3828_equation_0 = const()[name = tensor<string, []>("op_3828_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3828_cast_fp16 = einsum(equation = var_3828_equation_0, values = (var_3750_cast_fp16_6, var_3805_cast_fp16))[name = tensor<string, []>("op_3828_cast_fp16")];
+            tensor<string, []> var_3830_equation_0 = const()[name = tensor<string, []>("op_3830_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3830_cast_fp16 = einsum(equation = var_3830_equation_0, values = (var_3750_cast_fp16_7, var_3806_cast_fp16))[name = tensor<string, []>("op_3830_cast_fp16")];
+            tensor<string, []> var_3832_equation_0 = const()[name = tensor<string, []>("op_3832_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3832_cast_fp16 = einsum(equation = var_3832_equation_0, values = (var_3750_cast_fp16_8, var_3807_cast_fp16))[name = tensor<string, []>("op_3832_cast_fp16")];
+            tensor<string, []> var_3834_equation_0 = const()[name = tensor<string, []>("op_3834_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3834_cast_fp16 = einsum(equation = var_3834_equation_0, values = (var_3750_cast_fp16_9, var_3808_cast_fp16))[name = tensor<string, []>("op_3834_cast_fp16")];
+            tensor<string, []> var_3836_equation_0 = const()[name = tensor<string, []>("op_3836_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3836_cast_fp16 = einsum(equation = var_3836_equation_0, values = (var_3750_cast_fp16_10, var_3809_cast_fp16))[name = tensor<string, []>("op_3836_cast_fp16")];
+            tensor<string, []> var_3838_equation_0 = const()[name = tensor<string, []>("op_3838_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3838_cast_fp16 = einsum(equation = var_3838_equation_0, values = (var_3750_cast_fp16_11, var_3810_cast_fp16))[name = tensor<string, []>("op_3838_cast_fp16")];
+            tensor<string, []> var_3840_equation_0 = const()[name = tensor<string, []>("op_3840_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3840_cast_fp16 = einsum(equation = var_3840_equation_0, values = (var_3750_cast_fp16_12, var_3811_cast_fp16))[name = tensor<string, []>("op_3840_cast_fp16")];
+            tensor<string, []> var_3842_equation_0 = const()[name = tensor<string, []>("op_3842_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3842_cast_fp16 = einsum(equation = var_3842_equation_0, values = (var_3750_cast_fp16_13, var_3812_cast_fp16))[name = tensor<string, []>("op_3842_cast_fp16")];
+            tensor<string, []> var_3844_equation_0 = const()[name = tensor<string, []>("op_3844_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3844_cast_fp16 = einsum(equation = var_3844_equation_0, values = (var_3750_cast_fp16_14, var_3813_cast_fp16))[name = tensor<string, []>("op_3844_cast_fp16")];
+            tensor<string, []> var_3846_equation_0 = const()[name = tensor<string, []>("op_3846_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3846_cast_fp16 = einsum(equation = var_3846_equation_0, values = (var_3750_cast_fp16_15, var_3814_cast_fp16))[name = tensor<string, []>("op_3846_cast_fp16")];
+            tensor<bool, []> input_155_interleave_0 = const()[name = tensor<string, []>("input_155_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_155_cast_fp16 = concat(axis = var_3663, interleave = input_155_interleave_0, values = (var_3816_cast_fp16, var_3818_cast_fp16, var_3820_cast_fp16, var_3822_cast_fp16, var_3824_cast_fp16, var_3826_cast_fp16, var_3828_cast_fp16, var_3830_cast_fp16, var_3832_cast_fp16, var_3834_cast_fp16, var_3836_cast_fp16, var_3838_cast_fp16, var_3840_cast_fp16, var_3842_cast_fp16, var_3844_cast_fp16, var_3846_cast_fp16))[name = tensor<string, []>("input_155_cast_fp16")];
+            tensor<string, []> var_3855_pad_type_0 = const()[name = tensor<string, []>("op_3855_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3855_strides_0 = const()[name = tensor<string, []>("op_3855_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3855_pad_0 = const()[name = tensor<string, []>("op_3855_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3855_dilations_0 = const()[name = tensor<string, []>("op_3855_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3855_groups_0 = const()[name = tensor<string, []>("op_3855_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_15_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(394029952)))];
+            tensor<fp16, [1024]> blocks_15_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(396127168)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3855_cast_fp16 = conv(bias = blocks_15_attn_out_bias_to_fp16, dilations = var_3855_dilations_0, groups = var_3855_groups_0, pad = var_3855_pad_0, pad_type = var_3855_pad_type_0, strides = var_3855_strides_0, weight = blocks_15_attn_out_weight_to_fp16, x = input_155_cast_fp16)[name = tensor<string, []>("op_3855_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = var_3855_cast_fp16)[name = tensor<string, []>("inputs_63_cast_fp16")];
+            tensor<int32, [1]> input_157_axes_0 = const()[name = tensor<string, []>("input_157_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_157_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_157_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(396129280)))];
+            tensor<fp16, [1024]> input_157_beta_0_to_fp16 = const()[name = tensor<string, []>("input_157_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(396131392)))];
+            tensor<fp16, []> var_3865_to_fp16 = const()[name = tensor<string, []>("op_3865_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_157_cast_fp16 = layer_norm(axes = input_157_axes_0, beta = input_157_beta_0_to_fp16, epsilon = var_3865_to_fp16, gamma = input_157_gamma_0_to_fp16, x = inputs_63_cast_fp16)[name = tensor<string, []>("input_157_cast_fp16")];
+            tensor<string, []> input_159_pad_type_0 = const()[name = tensor<string, []>("input_159_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_159_strides_0 = const()[name = tensor<string, []>("input_159_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_159_pad_0 = const()[name = tensor<string, []>("input_159_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_159_dilations_0 = const()[name = tensor<string, []>("input_159_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_159_groups_0 = const()[name = tensor<string, []>("input_159_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_15_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(396133504)))];
+            tensor<fp16, [4096]> blocks_15_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(404522176)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_159_cast_fp16 = conv(bias = blocks_15_mlp_0_bias_to_fp16, dilations = input_159_dilations_0, groups = input_159_groups_0, pad = input_159_pad_0, pad_type = input_159_pad_type_0, strides = input_159_strides_0, weight = blocks_15_mlp_0_weight_to_fp16, x = input_157_cast_fp16)[name = tensor<string, []>("input_159_cast_fp16")];
+            tensor<string, []> input_161_mode_0 = const()[name = tensor<string, []>("input_161_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_161_cast_fp16 = gelu(mode = input_161_mode_0, x = input_159_cast_fp16)[name = tensor<string, []>("input_161_cast_fp16")];
+            tensor<string, []> var_3891_pad_type_0 = const()[name = tensor<string, []>("op_3891_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3891_strides_0 = const()[name = tensor<string, []>("op_3891_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3891_pad_0 = const()[name = tensor<string, []>("op_3891_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3891_dilations_0 = const()[name = tensor<string, []>("op_3891_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3891_groups_0 = const()[name = tensor<string, []>("op_3891_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_15_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(404530432)))];
+            tensor<fp16, [1024]> blocks_15_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412919104)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3891_cast_fp16 = conv(bias = blocks_15_mlp_2_bias_to_fp16, dilations = var_3891_dilations_0, groups = var_3891_groups_0, pad = var_3891_pad_0, pad_type = var_3891_pad_type_0, strides = var_3891_strides_0, weight = blocks_15_mlp_2_weight_to_fp16, x = input_161_cast_fp16)[name = tensor<string, []>("op_3891_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = var_3891_cast_fp16)[name = tensor<string, []>("inputs_65_cast_fp16")];
+            tensor<int32, []> var_3900 = const()[name = tensor<string, []>("op_3900"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_163_axes_0 = const()[name = tensor<string, []>("input_163_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_163_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_163_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412921216)))];
+            tensor<fp16, [1024]> input_163_beta_0_to_fp16 = const()[name = tensor<string, []>("input_163_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412923328)))];
+            tensor<fp16, []> var_3916_to_fp16 = const()[name = tensor<string, []>("op_3916_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_163_cast_fp16 = layer_norm(axes = input_163_axes_0, beta = input_163_beta_0_to_fp16, epsilon = var_3916_to_fp16, gamma = input_163_gamma_0_to_fp16, x = inputs_65_cast_fp16)[name = tensor<string, []>("input_163_cast_fp16")];
+            tensor<string, []> q_33_pad_type_0 = const()[name = tensor<string, []>("q_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_33_strides_0 = const()[name = tensor<string, []>("q_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_33_pad_0 = const()[name = tensor<string, []>("q_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_33_dilations_0 = const()[name = tensor<string, []>("q_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_33_groups_0 = const()[name = tensor<string, []>("q_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_3951_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3951_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412925440)))];
+            tensor<fp16, [1024]> var_3951_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3951_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(415022656)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3951_cast_fp16 = conv(bias = var_3951_bias_0_to_fp16, dilations = q_33_dilations_0, groups = q_33_groups_0, pad = q_33_pad_0, pad_type = q_33_pad_type_0, strides = q_33_strides_0, weight = var_3951_weight_0_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("op_3951_cast_fp16")];
+            tensor<string, []> k_33_pad_type_0 = const()[name = tensor<string, []>("k_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_33_strides_0 = const()[name = tensor<string, []>("k_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_33_pad_0 = const()[name = tensor<string, []>("k_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_33_dilations_0 = const()[name = tensor<string, []>("k_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_33_groups_0 = const()[name = tensor<string, []>("k_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_16_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(415024768)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_33_cast_fp16 = conv(dilations = k_33_dilations_0, groups = k_33_groups_0, pad = k_33_pad_0, pad_type = k_33_pad_type_0, strides = k_33_strides_0, weight = blocks_16_attn_key_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("k_33_cast_fp16")];
+            tensor<string, []> var_3949_pad_type_0 = const()[name = tensor<string, []>("op_3949_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3949_strides_0 = const()[name = tensor<string, []>("op_3949_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3949_pad_0 = const()[name = tensor<string, []>("op_3949_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3949_dilations_0 = const()[name = tensor<string, []>("op_3949_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3949_groups_0 = const()[name = tensor<string, []>("op_3949_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_16_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(417121984)))];
+            tensor<fp16, [1024]> blocks_16_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(419219200)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3949_cast_fp16 = conv(bias = blocks_16_attn_value_bias_to_fp16, dilations = var_3949_dilations_0, groups = var_3949_groups_0, pad = var_3949_pad_0, pad_type = var_3949_pad_type_0, strides = var_3949_strides_0, weight = blocks_16_attn_value_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("op_3949_cast_fp16")];
+            tensor<int32, [16]> tile_48 = const()[name = tensor<string, []>("tile_48"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3952_axis_0 = const()[name = tensor<string, []>("op_3952_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_15 = split(axis = var_3952_axis_0, split_sizes = tile_48, x = var_3951_cast_fp16)[name = tensor<string, []>("op_3952_cast_fp16")];
+            tensor<int32, [4]> var_3969_perm_0 = const()[name = tensor<string, []>("op_3969_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_49 = const()[name = tensor<string, []>("tile_49"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3970_axis_0 = const()[name = tensor<string, []>("op_3970_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_3969_cast_fp16 = transpose(perm = var_3969_perm_0, x = k_33_cast_fp16)[name = tensor<string, []>("transpose_8")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_15 = split(axis = var_3970_axis_0, split_sizes = tile_49, x = var_3969_cast_fp16)[name = tensor<string, []>("op_3970_cast_fp16")];
+            tensor<int32, [16]> tile_50 = const()[name = tensor<string, []>("tile_50"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3987_axis_0 = const()[name = tensor<string, []>("op_3987_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_15 = split(axis = var_3987_axis_0, split_sizes = tile_50, x = var_3949_cast_fp16)[name = tensor<string, []>("op_3987_cast_fp16")];
+            tensor<string, []> aw_513_equation_0 = const()[name = tensor<string, []>("aw_513_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_513_cast_fp16 = einsum(equation = aw_513_equation_0, values = (var_3970_cast_fp16_0, var_3952_cast_fp16_0))[name = tensor<string, []>("aw_513_cast_fp16")];
+            tensor<string, []> aw_515_equation_0 = const()[name = tensor<string, []>("aw_515_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_515_cast_fp16 = einsum(equation = aw_515_equation_0, values = (var_3970_cast_fp16_1, var_3952_cast_fp16_1))[name = tensor<string, []>("aw_515_cast_fp16")];
+            tensor<string, []> aw_517_equation_0 = const()[name = tensor<string, []>("aw_517_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_517_cast_fp16 = einsum(equation = aw_517_equation_0, values = (var_3970_cast_fp16_2, var_3952_cast_fp16_2))[name = tensor<string, []>("aw_517_cast_fp16")];
+            tensor<string, []> aw_519_equation_0 = const()[name = tensor<string, []>("aw_519_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_519_cast_fp16 = einsum(equation = aw_519_equation_0, values = (var_3970_cast_fp16_3, var_3952_cast_fp16_3))[name = tensor<string, []>("aw_519_cast_fp16")];
+            tensor<string, []> aw_521_equation_0 = const()[name = tensor<string, []>("aw_521_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_521_cast_fp16 = einsum(equation = aw_521_equation_0, values = (var_3970_cast_fp16_4, var_3952_cast_fp16_4))[name = tensor<string, []>("aw_521_cast_fp16")];
+            tensor<string, []> aw_523_equation_0 = const()[name = tensor<string, []>("aw_523_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_523_cast_fp16 = einsum(equation = aw_523_equation_0, values = (var_3970_cast_fp16_5, var_3952_cast_fp16_5))[name = tensor<string, []>("aw_523_cast_fp16")];
+            tensor<string, []> aw_525_equation_0 = const()[name = tensor<string, []>("aw_525_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_525_cast_fp16 = einsum(equation = aw_525_equation_0, values = (var_3970_cast_fp16_6, var_3952_cast_fp16_6))[name = tensor<string, []>("aw_525_cast_fp16")];
+            tensor<string, []> aw_527_equation_0 = const()[name = tensor<string, []>("aw_527_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_527_cast_fp16 = einsum(equation = aw_527_equation_0, values = (var_3970_cast_fp16_7, var_3952_cast_fp16_7))[name = tensor<string, []>("aw_527_cast_fp16")];
+            tensor<string, []> aw_529_equation_0 = const()[name = tensor<string, []>("aw_529_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_529_cast_fp16 = einsum(equation = aw_529_equation_0, values = (var_3970_cast_fp16_8, var_3952_cast_fp16_8))[name = tensor<string, []>("aw_529_cast_fp16")];
+            tensor<string, []> aw_531_equation_0 = const()[name = tensor<string, []>("aw_531_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_531_cast_fp16 = einsum(equation = aw_531_equation_0, values = (var_3970_cast_fp16_9, var_3952_cast_fp16_9))[name = tensor<string, []>("aw_531_cast_fp16")];
+            tensor<string, []> aw_533_equation_0 = const()[name = tensor<string, []>("aw_533_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_533_cast_fp16 = einsum(equation = aw_533_equation_0, values = (var_3970_cast_fp16_10, var_3952_cast_fp16_10))[name = tensor<string, []>("aw_533_cast_fp16")];
+            tensor<string, []> aw_535_equation_0 = const()[name = tensor<string, []>("aw_535_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_535_cast_fp16 = einsum(equation = aw_535_equation_0, values = (var_3970_cast_fp16_11, var_3952_cast_fp16_11))[name = tensor<string, []>("aw_535_cast_fp16")];
+            tensor<string, []> aw_537_equation_0 = const()[name = tensor<string, []>("aw_537_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_537_cast_fp16 = einsum(equation = aw_537_equation_0, values = (var_3970_cast_fp16_12, var_3952_cast_fp16_12))[name = tensor<string, []>("aw_537_cast_fp16")];
+            tensor<string, []> aw_539_equation_0 = const()[name = tensor<string, []>("aw_539_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_539_cast_fp16 = einsum(equation = aw_539_equation_0, values = (var_3970_cast_fp16_13, var_3952_cast_fp16_13))[name = tensor<string, []>("aw_539_cast_fp16")];
+            tensor<string, []> aw_541_equation_0 = const()[name = tensor<string, []>("aw_541_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_541_cast_fp16 = einsum(equation = aw_541_equation_0, values = (var_3970_cast_fp16_14, var_3952_cast_fp16_14))[name = tensor<string, []>("aw_541_cast_fp16")];
+            tensor<string, []> aw_543_equation_0 = const()[name = tensor<string, []>("aw_543_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_543_cast_fp16 = einsum(equation = aw_543_equation_0, values = (var_3970_cast_fp16_15, var_3952_cast_fp16_15))[name = tensor<string, []>("aw_543_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4036_cast_fp16 = softmax(axis = var_3900, x = aw_513_cast_fp16)[name = tensor<string, []>("op_4036_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4037_cast_fp16 = softmax(axis = var_3900, x = aw_515_cast_fp16)[name = tensor<string, []>("op_4037_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4038_cast_fp16 = softmax(axis = var_3900, x = aw_517_cast_fp16)[name = tensor<string, []>("op_4038_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4039_cast_fp16 = softmax(axis = var_3900, x = aw_519_cast_fp16)[name = tensor<string, []>("op_4039_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4040_cast_fp16 = softmax(axis = var_3900, x = aw_521_cast_fp16)[name = tensor<string, []>("op_4040_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4041_cast_fp16 = softmax(axis = var_3900, x = aw_523_cast_fp16)[name = tensor<string, []>("op_4041_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4042_cast_fp16 = softmax(axis = var_3900, x = aw_525_cast_fp16)[name = tensor<string, []>("op_4042_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4043_cast_fp16 = softmax(axis = var_3900, x = aw_527_cast_fp16)[name = tensor<string, []>("op_4043_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4044_cast_fp16 = softmax(axis = var_3900, x = aw_529_cast_fp16)[name = tensor<string, []>("op_4044_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4045_cast_fp16 = softmax(axis = var_3900, x = aw_531_cast_fp16)[name = tensor<string, []>("op_4045_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4046_cast_fp16 = softmax(axis = var_3900, x = aw_533_cast_fp16)[name = tensor<string, []>("op_4046_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4047_cast_fp16 = softmax(axis = var_3900, x = aw_535_cast_fp16)[name = tensor<string, []>("op_4047_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4048_cast_fp16 = softmax(axis = var_3900, x = aw_537_cast_fp16)[name = tensor<string, []>("op_4048_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4049_cast_fp16 = softmax(axis = var_3900, x = aw_539_cast_fp16)[name = tensor<string, []>("op_4049_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4050_cast_fp16 = softmax(axis = var_3900, x = aw_541_cast_fp16)[name = tensor<string, []>("op_4050_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4051_cast_fp16 = softmax(axis = var_3900, x = aw_543_cast_fp16)[name = tensor<string, []>("op_4051_cast_fp16")];
+            tensor<string, []> var_4053_equation_0 = const()[name = tensor<string, []>("op_4053_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4053_cast_fp16 = einsum(equation = var_4053_equation_0, values = (var_3987_cast_fp16_0, var_4036_cast_fp16))[name = tensor<string, []>("op_4053_cast_fp16")];
+            tensor<string, []> var_4055_equation_0 = const()[name = tensor<string, []>("op_4055_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4055_cast_fp16 = einsum(equation = var_4055_equation_0, values = (var_3987_cast_fp16_1, var_4037_cast_fp16))[name = tensor<string, []>("op_4055_cast_fp16")];
+            tensor<string, []> var_4057_equation_0 = const()[name = tensor<string, []>("op_4057_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4057_cast_fp16 = einsum(equation = var_4057_equation_0, values = (var_3987_cast_fp16_2, var_4038_cast_fp16))[name = tensor<string, []>("op_4057_cast_fp16")];
+            tensor<string, []> var_4059_equation_0 = const()[name = tensor<string, []>("op_4059_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4059_cast_fp16 = einsum(equation = var_4059_equation_0, values = (var_3987_cast_fp16_3, var_4039_cast_fp16))[name = tensor<string, []>("op_4059_cast_fp16")];
+            tensor<string, []> var_4061_equation_0 = const()[name = tensor<string, []>("op_4061_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4061_cast_fp16 = einsum(equation = var_4061_equation_0, values = (var_3987_cast_fp16_4, var_4040_cast_fp16))[name = tensor<string, []>("op_4061_cast_fp16")];
+            tensor<string, []> var_4063_equation_0 = const()[name = tensor<string, []>("op_4063_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4063_cast_fp16 = einsum(equation = var_4063_equation_0, values = (var_3987_cast_fp16_5, var_4041_cast_fp16))[name = tensor<string, []>("op_4063_cast_fp16")];
+            tensor<string, []> var_4065_equation_0 = const()[name = tensor<string, []>("op_4065_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4065_cast_fp16 = einsum(equation = var_4065_equation_0, values = (var_3987_cast_fp16_6, var_4042_cast_fp16))[name = tensor<string, []>("op_4065_cast_fp16")];
+            tensor<string, []> var_4067_equation_0 = const()[name = tensor<string, []>("op_4067_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4067_cast_fp16 = einsum(equation = var_4067_equation_0, values = (var_3987_cast_fp16_7, var_4043_cast_fp16))[name = tensor<string, []>("op_4067_cast_fp16")];
+            tensor<string, []> var_4069_equation_0 = const()[name = tensor<string, []>("op_4069_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4069_cast_fp16 = einsum(equation = var_4069_equation_0, values = (var_3987_cast_fp16_8, var_4044_cast_fp16))[name = tensor<string, []>("op_4069_cast_fp16")];
+            tensor<string, []> var_4071_equation_0 = const()[name = tensor<string, []>("op_4071_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4071_cast_fp16 = einsum(equation = var_4071_equation_0, values = (var_3987_cast_fp16_9, var_4045_cast_fp16))[name = tensor<string, []>("op_4071_cast_fp16")];
+            tensor<string, []> var_4073_equation_0 = const()[name = tensor<string, []>("op_4073_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4073_cast_fp16 = einsum(equation = var_4073_equation_0, values = (var_3987_cast_fp16_10, var_4046_cast_fp16))[name = tensor<string, []>("op_4073_cast_fp16")];
+            tensor<string, []> var_4075_equation_0 = const()[name = tensor<string, []>("op_4075_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4075_cast_fp16 = einsum(equation = var_4075_equation_0, values = (var_3987_cast_fp16_11, var_4047_cast_fp16))[name = tensor<string, []>("op_4075_cast_fp16")];
+            tensor<string, []> var_4077_equation_0 = const()[name = tensor<string, []>("op_4077_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4077_cast_fp16 = einsum(equation = var_4077_equation_0, values = (var_3987_cast_fp16_12, var_4048_cast_fp16))[name = tensor<string, []>("op_4077_cast_fp16")];
+            tensor<string, []> var_4079_equation_0 = const()[name = tensor<string, []>("op_4079_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4079_cast_fp16 = einsum(equation = var_4079_equation_0, values = (var_3987_cast_fp16_13, var_4049_cast_fp16))[name = tensor<string, []>("op_4079_cast_fp16")];
+            tensor<string, []> var_4081_equation_0 = const()[name = tensor<string, []>("op_4081_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4081_cast_fp16 = einsum(equation = var_4081_equation_0, values = (var_3987_cast_fp16_14, var_4050_cast_fp16))[name = tensor<string, []>("op_4081_cast_fp16")];
+            tensor<string, []> var_4083_equation_0 = const()[name = tensor<string, []>("op_4083_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4083_cast_fp16 = einsum(equation = var_4083_equation_0, values = (var_3987_cast_fp16_15, var_4051_cast_fp16))[name = tensor<string, []>("op_4083_cast_fp16")];
+            tensor<bool, []> input_165_interleave_0 = const()[name = tensor<string, []>("input_165_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_165_cast_fp16 = concat(axis = var_3900, interleave = input_165_interleave_0, values = (var_4053_cast_fp16, var_4055_cast_fp16, var_4057_cast_fp16, var_4059_cast_fp16, var_4061_cast_fp16, var_4063_cast_fp16, var_4065_cast_fp16, var_4067_cast_fp16, var_4069_cast_fp16, var_4071_cast_fp16, var_4073_cast_fp16, var_4075_cast_fp16, var_4077_cast_fp16, var_4079_cast_fp16, var_4081_cast_fp16, var_4083_cast_fp16))[name = tensor<string, []>("input_165_cast_fp16")];
+            tensor<string, []> var_4092_pad_type_0 = const()[name = tensor<string, []>("op_4092_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4092_strides_0 = const()[name = tensor<string, []>("op_4092_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4092_pad_0 = const()[name = tensor<string, []>("op_4092_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4092_dilations_0 = const()[name = tensor<string, []>("op_4092_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4092_groups_0 = const()[name = tensor<string, []>("op_4092_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_16_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(419221312)))];
+            tensor<fp16, [1024]> blocks_16_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(421318528)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4092_cast_fp16 = conv(bias = blocks_16_attn_out_bias_to_fp16, dilations = var_4092_dilations_0, groups = var_4092_groups_0, pad = var_4092_pad_0, pad_type = var_4092_pad_type_0, strides = var_4092_strides_0, weight = blocks_16_attn_out_weight_to_fp16, x = input_165_cast_fp16)[name = tensor<string, []>("op_4092_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = var_4092_cast_fp16)[name = tensor<string, []>("inputs_67_cast_fp16")];
+            tensor<int32, [1]> input_167_axes_0 = const()[name = tensor<string, []>("input_167_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_167_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_167_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(421320640)))];
+            tensor<fp16, [1024]> input_167_beta_0_to_fp16 = const()[name = tensor<string, []>("input_167_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(421322752)))];
+            tensor<fp16, []> var_4102_to_fp16 = const()[name = tensor<string, []>("op_4102_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_167_cast_fp16 = layer_norm(axes = input_167_axes_0, beta = input_167_beta_0_to_fp16, epsilon = var_4102_to_fp16, gamma = input_167_gamma_0_to_fp16, x = inputs_67_cast_fp16)[name = tensor<string, []>("input_167_cast_fp16")];
+            tensor<string, []> input_169_pad_type_0 = const()[name = tensor<string, []>("input_169_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_169_strides_0 = const()[name = tensor<string, []>("input_169_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_169_pad_0 = const()[name = tensor<string, []>("input_169_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_169_dilations_0 = const()[name = tensor<string, []>("input_169_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_169_groups_0 = const()[name = tensor<string, []>("input_169_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_16_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(421324864)))];
+            tensor<fp16, [4096]> blocks_16_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(429713536)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_169_cast_fp16 = conv(bias = blocks_16_mlp_0_bias_to_fp16, dilations = input_169_dilations_0, groups = input_169_groups_0, pad = input_169_pad_0, pad_type = input_169_pad_type_0, strides = input_169_strides_0, weight = blocks_16_mlp_0_weight_to_fp16, x = input_167_cast_fp16)[name = tensor<string, []>("input_169_cast_fp16")];
+            tensor<string, []> input_171_mode_0 = const()[name = tensor<string, []>("input_171_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_171_cast_fp16 = gelu(mode = input_171_mode_0, x = input_169_cast_fp16)[name = tensor<string, []>("input_171_cast_fp16")];
+            tensor<string, []> var_4128_pad_type_0 = const()[name = tensor<string, []>("op_4128_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4128_strides_0 = const()[name = tensor<string, []>("op_4128_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4128_pad_0 = const()[name = tensor<string, []>("op_4128_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4128_dilations_0 = const()[name = tensor<string, []>("op_4128_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4128_groups_0 = const()[name = tensor<string, []>("op_4128_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_16_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(429721792)))];
+            tensor<fp16, [1024]> blocks_16_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438110464)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4128_cast_fp16 = conv(bias = blocks_16_mlp_2_bias_to_fp16, dilations = var_4128_dilations_0, groups = var_4128_groups_0, pad = var_4128_pad_0, pad_type = var_4128_pad_type_0, strides = var_4128_strides_0, weight = blocks_16_mlp_2_weight_to_fp16, x = input_171_cast_fp16)[name = tensor<string, []>("op_4128_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = var_4128_cast_fp16)[name = tensor<string, []>("inputs_69_cast_fp16")];
+            tensor<int32, []> var_4137 = const()[name = tensor<string, []>("op_4137"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_173_axes_0 = const()[name = tensor<string, []>("input_173_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_173_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_173_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438112576)))];
+            tensor<fp16, [1024]> input_173_beta_0_to_fp16 = const()[name = tensor<string, []>("input_173_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438114688)))];
+            tensor<fp16, []> var_4153_to_fp16 = const()[name = tensor<string, []>("op_4153_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_173_cast_fp16 = layer_norm(axes = input_173_axes_0, beta = input_173_beta_0_to_fp16, epsilon = var_4153_to_fp16, gamma = input_173_gamma_0_to_fp16, x = inputs_69_cast_fp16)[name = tensor<string, []>("input_173_cast_fp16")];
+            tensor<string, []> q_35_pad_type_0 = const()[name = tensor<string, []>("q_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_35_strides_0 = const()[name = tensor<string, []>("q_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_35_pad_0 = const()[name = tensor<string, []>("q_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_35_dilations_0 = const()[name = tensor<string, []>("q_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_35_groups_0 = const()[name = tensor<string, []>("q_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_4188_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4188_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438116800)))];
+            tensor<fp16, [1024]> var_4188_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4188_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(440214016)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4188_cast_fp16 = conv(bias = var_4188_bias_0_to_fp16, dilations = q_35_dilations_0, groups = q_35_groups_0, pad = q_35_pad_0, pad_type = q_35_pad_type_0, strides = q_35_strides_0, weight = var_4188_weight_0_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("op_4188_cast_fp16")];
+            tensor<string, []> k_35_pad_type_0 = const()[name = tensor<string, []>("k_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_35_strides_0 = const()[name = tensor<string, []>("k_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_35_pad_0 = const()[name = tensor<string, []>("k_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_35_dilations_0 = const()[name = tensor<string, []>("k_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_35_groups_0 = const()[name = tensor<string, []>("k_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_17_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(440216128)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_35_cast_fp16 = conv(dilations = k_35_dilations_0, groups = k_35_groups_0, pad = k_35_pad_0, pad_type = k_35_pad_type_0, strides = k_35_strides_0, weight = blocks_17_attn_key_weight_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("k_35_cast_fp16")];
+            tensor<string, []> var_4186_pad_type_0 = const()[name = tensor<string, []>("op_4186_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4186_strides_0 = const()[name = tensor<string, []>("op_4186_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4186_pad_0 = const()[name = tensor<string, []>("op_4186_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4186_dilations_0 = const()[name = tensor<string, []>("op_4186_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4186_groups_0 = const()[name = tensor<string, []>("op_4186_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_17_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(442313344)))];
+            tensor<fp16, [1024]> blocks_17_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(444410560)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4186_cast_fp16 = conv(bias = blocks_17_attn_value_bias_to_fp16, dilations = var_4186_dilations_0, groups = var_4186_groups_0, pad = var_4186_pad_0, pad_type = var_4186_pad_type_0, strides = var_4186_strides_0, weight = blocks_17_attn_value_weight_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("op_4186_cast_fp16")];
+            tensor<int32, [16]> tile_51 = const()[name = tensor<string, []>("tile_51"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4189_axis_0 = const()[name = tensor<string, []>("op_4189_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_15 = split(axis = var_4189_axis_0, split_sizes = tile_51, x = var_4188_cast_fp16)[name = tensor<string, []>("op_4189_cast_fp16")];
+            tensor<int32, [4]> var_4206_perm_0 = const()[name = tensor<string, []>("op_4206_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_52 = const()[name = tensor<string, []>("tile_52"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4207_axis_0 = const()[name = tensor<string, []>("op_4207_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_4206_cast_fp16 = transpose(perm = var_4206_perm_0, x = k_35_cast_fp16)[name = tensor<string, []>("transpose_7")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_15 = split(axis = var_4207_axis_0, split_sizes = tile_52, x = var_4206_cast_fp16)[name = tensor<string, []>("op_4207_cast_fp16")];
+            tensor<int32, [16]> tile_53 = const()[name = tensor<string, []>("tile_53"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4224_axis_0 = const()[name = tensor<string, []>("op_4224_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_15 = split(axis = var_4224_axis_0, split_sizes = tile_53, x = var_4186_cast_fp16)[name = tensor<string, []>("op_4224_cast_fp16")];
+            tensor<string, []> aw_545_equation_0 = const()[name = tensor<string, []>("aw_545_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_545_cast_fp16 = einsum(equation = aw_545_equation_0, values = (var_4207_cast_fp16_0, var_4189_cast_fp16_0))[name = tensor<string, []>("aw_545_cast_fp16")];
+            tensor<string, []> aw_547_equation_0 = const()[name = tensor<string, []>("aw_547_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_547_cast_fp16 = einsum(equation = aw_547_equation_0, values = (var_4207_cast_fp16_1, var_4189_cast_fp16_1))[name = tensor<string, []>("aw_547_cast_fp16")];
+            tensor<string, []> aw_549_equation_0 = const()[name = tensor<string, []>("aw_549_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_549_cast_fp16 = einsum(equation = aw_549_equation_0, values = (var_4207_cast_fp16_2, var_4189_cast_fp16_2))[name = tensor<string, []>("aw_549_cast_fp16")];
+            tensor<string, []> aw_551_equation_0 = const()[name = tensor<string, []>("aw_551_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_551_cast_fp16 = einsum(equation = aw_551_equation_0, values = (var_4207_cast_fp16_3, var_4189_cast_fp16_3))[name = tensor<string, []>("aw_551_cast_fp16")];
+            tensor<string, []> aw_553_equation_0 = const()[name = tensor<string, []>("aw_553_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_553_cast_fp16 = einsum(equation = aw_553_equation_0, values = (var_4207_cast_fp16_4, var_4189_cast_fp16_4))[name = tensor<string, []>("aw_553_cast_fp16")];
+            tensor<string, []> aw_555_equation_0 = const()[name = tensor<string, []>("aw_555_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_555_cast_fp16 = einsum(equation = aw_555_equation_0, values = (var_4207_cast_fp16_5, var_4189_cast_fp16_5))[name = tensor<string, []>("aw_555_cast_fp16")];
+            tensor<string, []> aw_557_equation_0 = const()[name = tensor<string, []>("aw_557_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_557_cast_fp16 = einsum(equation = aw_557_equation_0, values = (var_4207_cast_fp16_6, var_4189_cast_fp16_6))[name = tensor<string, []>("aw_557_cast_fp16")];
+            tensor<string, []> aw_559_equation_0 = const()[name = tensor<string, []>("aw_559_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_559_cast_fp16 = einsum(equation = aw_559_equation_0, values = (var_4207_cast_fp16_7, var_4189_cast_fp16_7))[name = tensor<string, []>("aw_559_cast_fp16")];
+            tensor<string, []> aw_561_equation_0 = const()[name = tensor<string, []>("aw_561_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_561_cast_fp16 = einsum(equation = aw_561_equation_0, values = (var_4207_cast_fp16_8, var_4189_cast_fp16_8))[name = tensor<string, []>("aw_561_cast_fp16")];
+            tensor<string, []> aw_563_equation_0 = const()[name = tensor<string, []>("aw_563_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_563_cast_fp16 = einsum(equation = aw_563_equation_0, values = (var_4207_cast_fp16_9, var_4189_cast_fp16_9))[name = tensor<string, []>("aw_563_cast_fp16")];
+            tensor<string, []> aw_565_equation_0 = const()[name = tensor<string, []>("aw_565_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_565_cast_fp16 = einsum(equation = aw_565_equation_0, values = (var_4207_cast_fp16_10, var_4189_cast_fp16_10))[name = tensor<string, []>("aw_565_cast_fp16")];
+            tensor<string, []> aw_567_equation_0 = const()[name = tensor<string, []>("aw_567_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_567_cast_fp16 = einsum(equation = aw_567_equation_0, values = (var_4207_cast_fp16_11, var_4189_cast_fp16_11))[name = tensor<string, []>("aw_567_cast_fp16")];
+            tensor<string, []> aw_569_equation_0 = const()[name = tensor<string, []>("aw_569_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_569_cast_fp16 = einsum(equation = aw_569_equation_0, values = (var_4207_cast_fp16_12, var_4189_cast_fp16_12))[name = tensor<string, []>("aw_569_cast_fp16")];
+            tensor<string, []> aw_571_equation_0 = const()[name = tensor<string, []>("aw_571_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_571_cast_fp16 = einsum(equation = aw_571_equation_0, values = (var_4207_cast_fp16_13, var_4189_cast_fp16_13))[name = tensor<string, []>("aw_571_cast_fp16")];
+            tensor<string, []> aw_573_equation_0 = const()[name = tensor<string, []>("aw_573_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_573_cast_fp16 = einsum(equation = aw_573_equation_0, values = (var_4207_cast_fp16_14, var_4189_cast_fp16_14))[name = tensor<string, []>("aw_573_cast_fp16")];
+            tensor<string, []> aw_575_equation_0 = const()[name = tensor<string, []>("aw_575_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_575_cast_fp16 = einsum(equation = aw_575_equation_0, values = (var_4207_cast_fp16_15, var_4189_cast_fp16_15))[name = tensor<string, []>("aw_575_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4273_cast_fp16 = softmax(axis = var_4137, x = aw_545_cast_fp16)[name = tensor<string, []>("op_4273_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4274_cast_fp16 = softmax(axis = var_4137, x = aw_547_cast_fp16)[name = tensor<string, []>("op_4274_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4275_cast_fp16 = softmax(axis = var_4137, x = aw_549_cast_fp16)[name = tensor<string, []>("op_4275_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4276_cast_fp16 = softmax(axis = var_4137, x = aw_551_cast_fp16)[name = tensor<string, []>("op_4276_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4277_cast_fp16 = softmax(axis = var_4137, x = aw_553_cast_fp16)[name = tensor<string, []>("op_4277_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4278_cast_fp16 = softmax(axis = var_4137, x = aw_555_cast_fp16)[name = tensor<string, []>("op_4278_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4279_cast_fp16 = softmax(axis = var_4137, x = aw_557_cast_fp16)[name = tensor<string, []>("op_4279_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4280_cast_fp16 = softmax(axis = var_4137, x = aw_559_cast_fp16)[name = tensor<string, []>("op_4280_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4281_cast_fp16 = softmax(axis = var_4137, x = aw_561_cast_fp16)[name = tensor<string, []>("op_4281_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4282_cast_fp16 = softmax(axis = var_4137, x = aw_563_cast_fp16)[name = tensor<string, []>("op_4282_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4283_cast_fp16 = softmax(axis = var_4137, x = aw_565_cast_fp16)[name = tensor<string, []>("op_4283_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4284_cast_fp16 = softmax(axis = var_4137, x = aw_567_cast_fp16)[name = tensor<string, []>("op_4284_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4285_cast_fp16 = softmax(axis = var_4137, x = aw_569_cast_fp16)[name = tensor<string, []>("op_4285_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4286_cast_fp16 = softmax(axis = var_4137, x = aw_571_cast_fp16)[name = tensor<string, []>("op_4286_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4287_cast_fp16 = softmax(axis = var_4137, x = aw_573_cast_fp16)[name = tensor<string, []>("op_4287_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4288_cast_fp16 = softmax(axis = var_4137, x = aw_575_cast_fp16)[name = tensor<string, []>("op_4288_cast_fp16")];
+            tensor<string, []> var_4290_equation_0 = const()[name = tensor<string, []>("op_4290_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4290_cast_fp16 = einsum(equation = var_4290_equation_0, values = (var_4224_cast_fp16_0, var_4273_cast_fp16))[name = tensor<string, []>("op_4290_cast_fp16")];
+            tensor<string, []> var_4292_equation_0 = const()[name = tensor<string, []>("op_4292_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4292_cast_fp16 = einsum(equation = var_4292_equation_0, values = (var_4224_cast_fp16_1, var_4274_cast_fp16))[name = tensor<string, []>("op_4292_cast_fp16")];
+            tensor<string, []> var_4294_equation_0 = const()[name = tensor<string, []>("op_4294_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4294_cast_fp16 = einsum(equation = var_4294_equation_0, values = (var_4224_cast_fp16_2, var_4275_cast_fp16))[name = tensor<string, []>("op_4294_cast_fp16")];
+            tensor<string, []> var_4296_equation_0 = const()[name = tensor<string, []>("op_4296_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4296_cast_fp16 = einsum(equation = var_4296_equation_0, values = (var_4224_cast_fp16_3, var_4276_cast_fp16))[name = tensor<string, []>("op_4296_cast_fp16")];
+            tensor<string, []> var_4298_equation_0 = const()[name = tensor<string, []>("op_4298_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4298_cast_fp16 = einsum(equation = var_4298_equation_0, values = (var_4224_cast_fp16_4, var_4277_cast_fp16))[name = tensor<string, []>("op_4298_cast_fp16")];
+            tensor<string, []> var_4300_equation_0 = const()[name = tensor<string, []>("op_4300_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4300_cast_fp16 = einsum(equation = var_4300_equation_0, values = (var_4224_cast_fp16_5, var_4278_cast_fp16))[name = tensor<string, []>("op_4300_cast_fp16")];
+            tensor<string, []> var_4302_equation_0 = const()[name = tensor<string, []>("op_4302_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4302_cast_fp16 = einsum(equation = var_4302_equation_0, values = (var_4224_cast_fp16_6, var_4279_cast_fp16))[name = tensor<string, []>("op_4302_cast_fp16")];
+            tensor<string, []> var_4304_equation_0 = const()[name = tensor<string, []>("op_4304_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4304_cast_fp16 = einsum(equation = var_4304_equation_0, values = (var_4224_cast_fp16_7, var_4280_cast_fp16))[name = tensor<string, []>("op_4304_cast_fp16")];
+            tensor<string, []> var_4306_equation_0 = const()[name = tensor<string, []>("op_4306_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4306_cast_fp16 = einsum(equation = var_4306_equation_0, values = (var_4224_cast_fp16_8, var_4281_cast_fp16))[name = tensor<string, []>("op_4306_cast_fp16")];
+            tensor<string, []> var_4308_equation_0 = const()[name = tensor<string, []>("op_4308_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4308_cast_fp16 = einsum(equation = var_4308_equation_0, values = (var_4224_cast_fp16_9, var_4282_cast_fp16))[name = tensor<string, []>("op_4308_cast_fp16")];
+            tensor<string, []> var_4310_equation_0 = const()[name = tensor<string, []>("op_4310_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4310_cast_fp16 = einsum(equation = var_4310_equation_0, values = (var_4224_cast_fp16_10, var_4283_cast_fp16))[name = tensor<string, []>("op_4310_cast_fp16")];
+            tensor<string, []> var_4312_equation_0 = const()[name = tensor<string, []>("op_4312_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4312_cast_fp16 = einsum(equation = var_4312_equation_0, values = (var_4224_cast_fp16_11, var_4284_cast_fp16))[name = tensor<string, []>("op_4312_cast_fp16")];
+            tensor<string, []> var_4314_equation_0 = const()[name = tensor<string, []>("op_4314_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4314_cast_fp16 = einsum(equation = var_4314_equation_0, values = (var_4224_cast_fp16_12, var_4285_cast_fp16))[name = tensor<string, []>("op_4314_cast_fp16")];
+            tensor<string, []> var_4316_equation_0 = const()[name = tensor<string, []>("op_4316_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4316_cast_fp16 = einsum(equation = var_4316_equation_0, values = (var_4224_cast_fp16_13, var_4286_cast_fp16))[name = tensor<string, []>("op_4316_cast_fp16")];
+            tensor<string, []> var_4318_equation_0 = const()[name = tensor<string, []>("op_4318_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4318_cast_fp16 = einsum(equation = var_4318_equation_0, values = (var_4224_cast_fp16_14, var_4287_cast_fp16))[name = tensor<string, []>("op_4318_cast_fp16")];
+            tensor<string, []> var_4320_equation_0 = const()[name = tensor<string, []>("op_4320_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4320_cast_fp16 = einsum(equation = var_4320_equation_0, values = (var_4224_cast_fp16_15, var_4288_cast_fp16))[name = tensor<string, []>("op_4320_cast_fp16")];
+            tensor<bool, []> input_175_interleave_0 = const()[name = tensor<string, []>("input_175_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_175_cast_fp16 = concat(axis = var_4137, interleave = input_175_interleave_0, values = (var_4290_cast_fp16, var_4292_cast_fp16, var_4294_cast_fp16, var_4296_cast_fp16, var_4298_cast_fp16, var_4300_cast_fp16, var_4302_cast_fp16, var_4304_cast_fp16, var_4306_cast_fp16, var_4308_cast_fp16, var_4310_cast_fp16, var_4312_cast_fp16, var_4314_cast_fp16, var_4316_cast_fp16, var_4318_cast_fp16, var_4320_cast_fp16))[name = tensor<string, []>("input_175_cast_fp16")];
+            tensor<string, []> var_4329_pad_type_0 = const()[name = tensor<string, []>("op_4329_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4329_strides_0 = const()[name = tensor<string, []>("op_4329_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4329_pad_0 = const()[name = tensor<string, []>("op_4329_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4329_dilations_0 = const()[name = tensor<string, []>("op_4329_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4329_groups_0 = const()[name = tensor<string, []>("op_4329_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_17_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(444412672)))];
+            tensor<fp16, [1024]> blocks_17_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446509888)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4329_cast_fp16 = conv(bias = blocks_17_attn_out_bias_to_fp16, dilations = var_4329_dilations_0, groups = var_4329_groups_0, pad = var_4329_pad_0, pad_type = var_4329_pad_type_0, strides = var_4329_strides_0, weight = blocks_17_attn_out_weight_to_fp16, x = input_175_cast_fp16)[name = tensor<string, []>("op_4329_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = var_4329_cast_fp16)[name = tensor<string, []>("inputs_71_cast_fp16")];
+            tensor<int32, [1]> input_177_axes_0 = const()[name = tensor<string, []>("input_177_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_177_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_177_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446512000)))];
+            tensor<fp16, [1024]> input_177_beta_0_to_fp16 = const()[name = tensor<string, []>("input_177_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446514112)))];
+            tensor<fp16, []> var_4339_to_fp16 = const()[name = tensor<string, []>("op_4339_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_177_cast_fp16 = layer_norm(axes = input_177_axes_0, beta = input_177_beta_0_to_fp16, epsilon = var_4339_to_fp16, gamma = input_177_gamma_0_to_fp16, x = inputs_71_cast_fp16)[name = tensor<string, []>("input_177_cast_fp16")];
+            tensor<string, []> input_179_pad_type_0 = const()[name = tensor<string, []>("input_179_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_179_strides_0 = const()[name = tensor<string, []>("input_179_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_179_pad_0 = const()[name = tensor<string, []>("input_179_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_179_dilations_0 = const()[name = tensor<string, []>("input_179_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_179_groups_0 = const()[name = tensor<string, []>("input_179_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_17_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446516224)))];
+            tensor<fp16, [4096]> blocks_17_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(454904896)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_179_cast_fp16 = conv(bias = blocks_17_mlp_0_bias_to_fp16, dilations = input_179_dilations_0, groups = input_179_groups_0, pad = input_179_pad_0, pad_type = input_179_pad_type_0, strides = input_179_strides_0, weight = blocks_17_mlp_0_weight_to_fp16, x = input_177_cast_fp16)[name = tensor<string, []>("input_179_cast_fp16")];
+            tensor<string, []> input_181_mode_0 = const()[name = tensor<string, []>("input_181_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_181_cast_fp16 = gelu(mode = input_181_mode_0, x = input_179_cast_fp16)[name = tensor<string, []>("input_181_cast_fp16")];
+            tensor<string, []> var_4365_pad_type_0 = const()[name = tensor<string, []>("op_4365_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4365_strides_0 = const()[name = tensor<string, []>("op_4365_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4365_pad_0 = const()[name = tensor<string, []>("op_4365_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4365_dilations_0 = const()[name = tensor<string, []>("op_4365_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4365_groups_0 = const()[name = tensor<string, []>("op_4365_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_17_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(454913152)))];
+            tensor<fp16, [1024]> blocks_17_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463301824)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4365_cast_fp16 = conv(bias = blocks_17_mlp_2_bias_to_fp16, dilations = var_4365_dilations_0, groups = var_4365_groups_0, pad = var_4365_pad_0, pad_type = var_4365_pad_type_0, strides = var_4365_strides_0, weight = blocks_17_mlp_2_weight_to_fp16, x = input_181_cast_fp16)[name = tensor<string, []>("op_4365_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_73_cast_fp16 = add(x = inputs_71_cast_fp16, y = var_4365_cast_fp16)[name = tensor<string, []>("inputs_73_cast_fp16")];
+            tensor<int32, []> var_4374 = const()[name = tensor<string, []>("op_4374"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_183_axes_0 = const()[name = tensor<string, []>("input_183_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_183_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_183_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463303936)))];
+            tensor<fp16, [1024]> input_183_beta_0_to_fp16 = const()[name = tensor<string, []>("input_183_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463306048)))];
+            tensor<fp16, []> var_4390_to_fp16 = const()[name = tensor<string, []>("op_4390_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_183_cast_fp16 = layer_norm(axes = input_183_axes_0, beta = input_183_beta_0_to_fp16, epsilon = var_4390_to_fp16, gamma = input_183_gamma_0_to_fp16, x = inputs_73_cast_fp16)[name = tensor<string, []>("input_183_cast_fp16")];
+            tensor<string, []> q_37_pad_type_0 = const()[name = tensor<string, []>("q_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_37_strides_0 = const()[name = tensor<string, []>("q_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_37_pad_0 = const()[name = tensor<string, []>("q_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_37_dilations_0 = const()[name = tensor<string, []>("q_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_37_groups_0 = const()[name = tensor<string, []>("q_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_4425_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4425_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463308160)))];
+            tensor<fp16, [1024]> var_4425_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4425_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(465405376)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4425_cast_fp16 = conv(bias = var_4425_bias_0_to_fp16, dilations = q_37_dilations_0, groups = q_37_groups_0, pad = q_37_pad_0, pad_type = q_37_pad_type_0, strides = q_37_strides_0, weight = var_4425_weight_0_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("op_4425_cast_fp16")];
+            tensor<string, []> k_37_pad_type_0 = const()[name = tensor<string, []>("k_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_37_strides_0 = const()[name = tensor<string, []>("k_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_37_pad_0 = const()[name = tensor<string, []>("k_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_37_dilations_0 = const()[name = tensor<string, []>("k_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_37_groups_0 = const()[name = tensor<string, []>("k_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_18_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(465407488)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_37_cast_fp16 = conv(dilations = k_37_dilations_0, groups = k_37_groups_0, pad = k_37_pad_0, pad_type = k_37_pad_type_0, strides = k_37_strides_0, weight = blocks_18_attn_key_weight_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("k_37_cast_fp16")];
+            tensor<string, []> var_4423_pad_type_0 = const()[name = tensor<string, []>("op_4423_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4423_strides_0 = const()[name = tensor<string, []>("op_4423_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4423_pad_0 = const()[name = tensor<string, []>("op_4423_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4423_dilations_0 = const()[name = tensor<string, []>("op_4423_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4423_groups_0 = const()[name = tensor<string, []>("op_4423_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_18_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(467504704)))];
+            tensor<fp16, [1024]> blocks_18_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(469601920)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4423_cast_fp16 = conv(bias = blocks_18_attn_value_bias_to_fp16, dilations = var_4423_dilations_0, groups = var_4423_groups_0, pad = var_4423_pad_0, pad_type = var_4423_pad_type_0, strides = var_4423_strides_0, weight = blocks_18_attn_value_weight_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("op_4423_cast_fp16")];
+            tensor<int32, [16]> tile_54 = const()[name = tensor<string, []>("tile_54"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4426_axis_0 = const()[name = tensor<string, []>("op_4426_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_15 = split(axis = var_4426_axis_0, split_sizes = tile_54, x = var_4425_cast_fp16)[name = tensor<string, []>("op_4426_cast_fp16")];
+            tensor<int32, [4]> var_4443_perm_0 = const()[name = tensor<string, []>("op_4443_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_55 = const()[name = tensor<string, []>("tile_55"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4444_axis_0 = const()[name = tensor<string, []>("op_4444_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_4443_cast_fp16 = transpose(perm = var_4443_perm_0, x = k_37_cast_fp16)[name = tensor<string, []>("transpose_6")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_15 = split(axis = var_4444_axis_0, split_sizes = tile_55, x = var_4443_cast_fp16)[name = tensor<string, []>("op_4444_cast_fp16")];
+            tensor<int32, [16]> tile_56 = const()[name = tensor<string, []>("tile_56"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4461_axis_0 = const()[name = tensor<string, []>("op_4461_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_15 = split(axis = var_4461_axis_0, split_sizes = tile_56, x = var_4423_cast_fp16)[name = tensor<string, []>("op_4461_cast_fp16")];
+            tensor<string, []> aw_577_equation_0 = const()[name = tensor<string, []>("aw_577_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_577_cast_fp16 = einsum(equation = aw_577_equation_0, values = (var_4444_cast_fp16_0, var_4426_cast_fp16_0))[name = tensor<string, []>("aw_577_cast_fp16")];
+            tensor<string, []> aw_579_equation_0 = const()[name = tensor<string, []>("aw_579_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_579_cast_fp16 = einsum(equation = aw_579_equation_0, values = (var_4444_cast_fp16_1, var_4426_cast_fp16_1))[name = tensor<string, []>("aw_579_cast_fp16")];
+            tensor<string, []> aw_581_equation_0 = const()[name = tensor<string, []>("aw_581_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_581_cast_fp16 = einsum(equation = aw_581_equation_0, values = (var_4444_cast_fp16_2, var_4426_cast_fp16_2))[name = tensor<string, []>("aw_581_cast_fp16")];
+            tensor<string, []> aw_583_equation_0 = const()[name = tensor<string, []>("aw_583_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_583_cast_fp16 = einsum(equation = aw_583_equation_0, values = (var_4444_cast_fp16_3, var_4426_cast_fp16_3))[name = tensor<string, []>("aw_583_cast_fp16")];
+            tensor<string, []> aw_585_equation_0 = const()[name = tensor<string, []>("aw_585_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_585_cast_fp16 = einsum(equation = aw_585_equation_0, values = (var_4444_cast_fp16_4, var_4426_cast_fp16_4))[name = tensor<string, []>("aw_585_cast_fp16")];
+            tensor<string, []> aw_587_equation_0 = const()[name = tensor<string, []>("aw_587_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_587_cast_fp16 = einsum(equation = aw_587_equation_0, values = (var_4444_cast_fp16_5, var_4426_cast_fp16_5))[name = tensor<string, []>("aw_587_cast_fp16")];
+            tensor<string, []> aw_589_equation_0 = const()[name = tensor<string, []>("aw_589_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_589_cast_fp16 = einsum(equation = aw_589_equation_0, values = (var_4444_cast_fp16_6, var_4426_cast_fp16_6))[name = tensor<string, []>("aw_589_cast_fp16")];
+            tensor<string, []> aw_591_equation_0 = const()[name = tensor<string, []>("aw_591_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_591_cast_fp16 = einsum(equation = aw_591_equation_0, values = (var_4444_cast_fp16_7, var_4426_cast_fp16_7))[name = tensor<string, []>("aw_591_cast_fp16")];
+            tensor<string, []> aw_593_equation_0 = const()[name = tensor<string, []>("aw_593_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_593_cast_fp16 = einsum(equation = aw_593_equation_0, values = (var_4444_cast_fp16_8, var_4426_cast_fp16_8))[name = tensor<string, []>("aw_593_cast_fp16")];
+            tensor<string, []> aw_595_equation_0 = const()[name = tensor<string, []>("aw_595_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_595_cast_fp16 = einsum(equation = aw_595_equation_0, values = (var_4444_cast_fp16_9, var_4426_cast_fp16_9))[name = tensor<string, []>("aw_595_cast_fp16")];
+            tensor<string, []> aw_597_equation_0 = const()[name = tensor<string, []>("aw_597_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_597_cast_fp16 = einsum(equation = aw_597_equation_0, values = (var_4444_cast_fp16_10, var_4426_cast_fp16_10))[name = tensor<string, []>("aw_597_cast_fp16")];
+            tensor<string, []> aw_599_equation_0 = const()[name = tensor<string, []>("aw_599_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_599_cast_fp16 = einsum(equation = aw_599_equation_0, values = (var_4444_cast_fp16_11, var_4426_cast_fp16_11))[name = tensor<string, []>("aw_599_cast_fp16")];
+            tensor<string, []> aw_601_equation_0 = const()[name = tensor<string, []>("aw_601_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_601_cast_fp16 = einsum(equation = aw_601_equation_0, values = (var_4444_cast_fp16_12, var_4426_cast_fp16_12))[name = tensor<string, []>("aw_601_cast_fp16")];
+            tensor<string, []> aw_603_equation_0 = const()[name = tensor<string, []>("aw_603_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_603_cast_fp16 = einsum(equation = aw_603_equation_0, values = (var_4444_cast_fp16_13, var_4426_cast_fp16_13))[name = tensor<string, []>("aw_603_cast_fp16")];
+            tensor<string, []> aw_605_equation_0 = const()[name = tensor<string, []>("aw_605_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_605_cast_fp16 = einsum(equation = aw_605_equation_0, values = (var_4444_cast_fp16_14, var_4426_cast_fp16_14))[name = tensor<string, []>("aw_605_cast_fp16")];
+            tensor<string, []> aw_607_equation_0 = const()[name = tensor<string, []>("aw_607_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_607_cast_fp16 = einsum(equation = aw_607_equation_0, values = (var_4444_cast_fp16_15, var_4426_cast_fp16_15))[name = tensor<string, []>("aw_607_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4510_cast_fp16 = softmax(axis = var_4374, x = aw_577_cast_fp16)[name = tensor<string, []>("op_4510_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4511_cast_fp16 = softmax(axis = var_4374, x = aw_579_cast_fp16)[name = tensor<string, []>("op_4511_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4512_cast_fp16 = softmax(axis = var_4374, x = aw_581_cast_fp16)[name = tensor<string, []>("op_4512_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4513_cast_fp16 = softmax(axis = var_4374, x = aw_583_cast_fp16)[name = tensor<string, []>("op_4513_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4514_cast_fp16 = softmax(axis = var_4374, x = aw_585_cast_fp16)[name = tensor<string, []>("op_4514_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4515_cast_fp16 = softmax(axis = var_4374, x = aw_587_cast_fp16)[name = tensor<string, []>("op_4515_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4516_cast_fp16 = softmax(axis = var_4374, x = aw_589_cast_fp16)[name = tensor<string, []>("op_4516_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4517_cast_fp16 = softmax(axis = var_4374, x = aw_591_cast_fp16)[name = tensor<string, []>("op_4517_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4518_cast_fp16 = softmax(axis = var_4374, x = aw_593_cast_fp16)[name = tensor<string, []>("op_4518_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4519_cast_fp16 = softmax(axis = var_4374, x = aw_595_cast_fp16)[name = tensor<string, []>("op_4519_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4520_cast_fp16 = softmax(axis = var_4374, x = aw_597_cast_fp16)[name = tensor<string, []>("op_4520_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4521_cast_fp16 = softmax(axis = var_4374, x = aw_599_cast_fp16)[name = tensor<string, []>("op_4521_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4522_cast_fp16 = softmax(axis = var_4374, x = aw_601_cast_fp16)[name = tensor<string, []>("op_4522_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4523_cast_fp16 = softmax(axis = var_4374, x = aw_603_cast_fp16)[name = tensor<string, []>("op_4523_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4524_cast_fp16 = softmax(axis = var_4374, x = aw_605_cast_fp16)[name = tensor<string, []>("op_4524_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4525_cast_fp16 = softmax(axis = var_4374, x = aw_607_cast_fp16)[name = tensor<string, []>("op_4525_cast_fp16")];
+            tensor<string, []> var_4527_equation_0 = const()[name = tensor<string, []>("op_4527_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4527_cast_fp16 = einsum(equation = var_4527_equation_0, values = (var_4461_cast_fp16_0, var_4510_cast_fp16))[name = tensor<string, []>("op_4527_cast_fp16")];
+            tensor<string, []> var_4529_equation_0 = const()[name = tensor<string, []>("op_4529_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4529_cast_fp16 = einsum(equation = var_4529_equation_0, values = (var_4461_cast_fp16_1, var_4511_cast_fp16))[name = tensor<string, []>("op_4529_cast_fp16")];
+            tensor<string, []> var_4531_equation_0 = const()[name = tensor<string, []>("op_4531_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4531_cast_fp16 = einsum(equation = var_4531_equation_0, values = (var_4461_cast_fp16_2, var_4512_cast_fp16))[name = tensor<string, []>("op_4531_cast_fp16")];
+            tensor<string, []> var_4533_equation_0 = const()[name = tensor<string, []>("op_4533_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4533_cast_fp16 = einsum(equation = var_4533_equation_0, values = (var_4461_cast_fp16_3, var_4513_cast_fp16))[name = tensor<string, []>("op_4533_cast_fp16")];
+            tensor<string, []> var_4535_equation_0 = const()[name = tensor<string, []>("op_4535_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4535_cast_fp16 = einsum(equation = var_4535_equation_0, values = (var_4461_cast_fp16_4, var_4514_cast_fp16))[name = tensor<string, []>("op_4535_cast_fp16")];
+            tensor<string, []> var_4537_equation_0 = const()[name = tensor<string, []>("op_4537_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4537_cast_fp16 = einsum(equation = var_4537_equation_0, values = (var_4461_cast_fp16_5, var_4515_cast_fp16))[name = tensor<string, []>("op_4537_cast_fp16")];
+            tensor<string, []> var_4539_equation_0 = const()[name = tensor<string, []>("op_4539_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4539_cast_fp16 = einsum(equation = var_4539_equation_0, values = (var_4461_cast_fp16_6, var_4516_cast_fp16))[name = tensor<string, []>("op_4539_cast_fp16")];
+            tensor<string, []> var_4541_equation_0 = const()[name = tensor<string, []>("op_4541_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4541_cast_fp16 = einsum(equation = var_4541_equation_0, values = (var_4461_cast_fp16_7, var_4517_cast_fp16))[name = tensor<string, []>("op_4541_cast_fp16")];
+            tensor<string, []> var_4543_equation_0 = const()[name = tensor<string, []>("op_4543_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4543_cast_fp16 = einsum(equation = var_4543_equation_0, values = (var_4461_cast_fp16_8, var_4518_cast_fp16))[name = tensor<string, []>("op_4543_cast_fp16")];
+            tensor<string, []> var_4545_equation_0 = const()[name = tensor<string, []>("op_4545_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4545_cast_fp16 = einsum(equation = var_4545_equation_0, values = (var_4461_cast_fp16_9, var_4519_cast_fp16))[name = tensor<string, []>("op_4545_cast_fp16")];
+            tensor<string, []> var_4547_equation_0 = const()[name = tensor<string, []>("op_4547_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4547_cast_fp16 = einsum(equation = var_4547_equation_0, values = (var_4461_cast_fp16_10, var_4520_cast_fp16))[name = tensor<string, []>("op_4547_cast_fp16")];
+            tensor<string, []> var_4549_equation_0 = const()[name = tensor<string, []>("op_4549_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4549_cast_fp16 = einsum(equation = var_4549_equation_0, values = (var_4461_cast_fp16_11, var_4521_cast_fp16))[name = tensor<string, []>("op_4549_cast_fp16")];
+            tensor<string, []> var_4551_equation_0 = const()[name = tensor<string, []>("op_4551_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4551_cast_fp16 = einsum(equation = var_4551_equation_0, values = (var_4461_cast_fp16_12, var_4522_cast_fp16))[name = tensor<string, []>("op_4551_cast_fp16")];
+            tensor<string, []> var_4553_equation_0 = const()[name = tensor<string, []>("op_4553_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4553_cast_fp16 = einsum(equation = var_4553_equation_0, values = (var_4461_cast_fp16_13, var_4523_cast_fp16))[name = tensor<string, []>("op_4553_cast_fp16")];
+            tensor<string, []> var_4555_equation_0 = const()[name = tensor<string, []>("op_4555_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4555_cast_fp16 = einsum(equation = var_4555_equation_0, values = (var_4461_cast_fp16_14, var_4524_cast_fp16))[name = tensor<string, []>("op_4555_cast_fp16")];
+            tensor<string, []> var_4557_equation_0 = const()[name = tensor<string, []>("op_4557_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4557_cast_fp16 = einsum(equation = var_4557_equation_0, values = (var_4461_cast_fp16_15, var_4525_cast_fp16))[name = tensor<string, []>("op_4557_cast_fp16")];
+            tensor<bool, []> input_185_interleave_0 = const()[name = tensor<string, []>("input_185_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_185_cast_fp16 = concat(axis = var_4374, interleave = input_185_interleave_0, values = (var_4527_cast_fp16, var_4529_cast_fp16, var_4531_cast_fp16, var_4533_cast_fp16, var_4535_cast_fp16, var_4537_cast_fp16, var_4539_cast_fp16, var_4541_cast_fp16, var_4543_cast_fp16, var_4545_cast_fp16, var_4547_cast_fp16, var_4549_cast_fp16, var_4551_cast_fp16, var_4553_cast_fp16, var_4555_cast_fp16, var_4557_cast_fp16))[name = tensor<string, []>("input_185_cast_fp16")];
+            tensor<string, []> var_4566_pad_type_0 = const()[name = tensor<string, []>("op_4566_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4566_strides_0 = const()[name = tensor<string, []>("op_4566_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4566_pad_0 = const()[name = tensor<string, []>("op_4566_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4566_dilations_0 = const()[name = tensor<string, []>("op_4566_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4566_groups_0 = const()[name = tensor<string, []>("op_4566_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_18_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(469604032)))];
+            tensor<fp16, [1024]> blocks_18_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471701248)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4566_cast_fp16 = conv(bias = blocks_18_attn_out_bias_to_fp16, dilations = var_4566_dilations_0, groups = var_4566_groups_0, pad = var_4566_pad_0, pad_type = var_4566_pad_type_0, strides = var_4566_strides_0, weight = blocks_18_attn_out_weight_to_fp16, x = input_185_cast_fp16)[name = tensor<string, []>("op_4566_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_75_cast_fp16 = add(x = inputs_73_cast_fp16, y = var_4566_cast_fp16)[name = tensor<string, []>("inputs_75_cast_fp16")];
+            tensor<int32, [1]> input_187_axes_0 = const()[name = tensor<string, []>("input_187_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_187_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_187_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471703360)))];
+            tensor<fp16, [1024]> input_187_beta_0_to_fp16 = const()[name = tensor<string, []>("input_187_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471705472)))];
+            tensor<fp16, []> var_4576_to_fp16 = const()[name = tensor<string, []>("op_4576_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_187_cast_fp16 = layer_norm(axes = input_187_axes_0, beta = input_187_beta_0_to_fp16, epsilon = var_4576_to_fp16, gamma = input_187_gamma_0_to_fp16, x = inputs_75_cast_fp16)[name = tensor<string, []>("input_187_cast_fp16")];
+            tensor<string, []> input_189_pad_type_0 = const()[name = tensor<string, []>("input_189_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_189_strides_0 = const()[name = tensor<string, []>("input_189_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_189_pad_0 = const()[name = tensor<string, []>("input_189_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_189_dilations_0 = const()[name = tensor<string, []>("input_189_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_189_groups_0 = const()[name = tensor<string, []>("input_189_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_18_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471707584)))];
+            tensor<fp16, [4096]> blocks_18_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(480096256)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_189_cast_fp16 = conv(bias = blocks_18_mlp_0_bias_to_fp16, dilations = input_189_dilations_0, groups = input_189_groups_0, pad = input_189_pad_0, pad_type = input_189_pad_type_0, strides = input_189_strides_0, weight = blocks_18_mlp_0_weight_to_fp16, x = input_187_cast_fp16)[name = tensor<string, []>("input_189_cast_fp16")];
+            tensor<string, []> input_191_mode_0 = const()[name = tensor<string, []>("input_191_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_191_cast_fp16 = gelu(mode = input_191_mode_0, x = input_189_cast_fp16)[name = tensor<string, []>("input_191_cast_fp16")];
+            tensor<string, []> var_4602_pad_type_0 = const()[name = tensor<string, []>("op_4602_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4602_strides_0 = const()[name = tensor<string, []>("op_4602_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4602_pad_0 = const()[name = tensor<string, []>("op_4602_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4602_dilations_0 = const()[name = tensor<string, []>("op_4602_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4602_groups_0 = const()[name = tensor<string, []>("op_4602_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_18_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(480104512)))];
+            tensor<fp16, [1024]> blocks_18_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488493184)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4602_cast_fp16 = conv(bias = blocks_18_mlp_2_bias_to_fp16, dilations = var_4602_dilations_0, groups = var_4602_groups_0, pad = var_4602_pad_0, pad_type = var_4602_pad_type_0, strides = var_4602_strides_0, weight = blocks_18_mlp_2_weight_to_fp16, x = input_191_cast_fp16)[name = tensor<string, []>("op_4602_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_77_cast_fp16 = add(x = inputs_75_cast_fp16, y = var_4602_cast_fp16)[name = tensor<string, []>("inputs_77_cast_fp16")];
+            tensor<int32, []> var_4611 = const()[name = tensor<string, []>("op_4611"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_193_axes_0 = const()[name = tensor<string, []>("input_193_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_193_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_193_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488495296)))];
+            tensor<fp16, [1024]> input_193_beta_0_to_fp16 = const()[name = tensor<string, []>("input_193_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488497408)))];
+            tensor<fp16, []> var_4627_to_fp16 = const()[name = tensor<string, []>("op_4627_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_193_cast_fp16 = layer_norm(axes = input_193_axes_0, beta = input_193_beta_0_to_fp16, epsilon = var_4627_to_fp16, gamma = input_193_gamma_0_to_fp16, x = inputs_77_cast_fp16)[name = tensor<string, []>("input_193_cast_fp16")];
+            tensor<string, []> q_39_pad_type_0 = const()[name = tensor<string, []>("q_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_39_strides_0 = const()[name = tensor<string, []>("q_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_39_pad_0 = const()[name = tensor<string, []>("q_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_39_dilations_0 = const()[name = tensor<string, []>("q_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_39_groups_0 = const()[name = tensor<string, []>("q_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_4662_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4662_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488499520)))];
+            tensor<fp16, [1024]> var_4662_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4662_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(490596736)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4662_cast_fp16 = conv(bias = var_4662_bias_0_to_fp16, dilations = q_39_dilations_0, groups = q_39_groups_0, pad = q_39_pad_0, pad_type = q_39_pad_type_0, strides = q_39_strides_0, weight = var_4662_weight_0_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("op_4662_cast_fp16")];
+            tensor<string, []> k_39_pad_type_0 = const()[name = tensor<string, []>("k_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_39_strides_0 = const()[name = tensor<string, []>("k_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_39_pad_0 = const()[name = tensor<string, []>("k_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_39_dilations_0 = const()[name = tensor<string, []>("k_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_39_groups_0 = const()[name = tensor<string, []>("k_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_19_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(490598848)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_39_cast_fp16 = conv(dilations = k_39_dilations_0, groups = k_39_groups_0, pad = k_39_pad_0, pad_type = k_39_pad_type_0, strides = k_39_strides_0, weight = blocks_19_attn_key_weight_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("k_39_cast_fp16")];
+            tensor<string, []> var_4660_pad_type_0 = const()[name = tensor<string, []>("op_4660_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4660_strides_0 = const()[name = tensor<string, []>("op_4660_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4660_pad_0 = const()[name = tensor<string, []>("op_4660_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4660_dilations_0 = const()[name = tensor<string, []>("op_4660_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4660_groups_0 = const()[name = tensor<string, []>("op_4660_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_19_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(492696064)))];
+            tensor<fp16, [1024]> blocks_19_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(494793280)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4660_cast_fp16 = conv(bias = blocks_19_attn_value_bias_to_fp16, dilations = var_4660_dilations_0, groups = var_4660_groups_0, pad = var_4660_pad_0, pad_type = var_4660_pad_type_0, strides = var_4660_strides_0, weight = blocks_19_attn_value_weight_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("op_4660_cast_fp16")];
+            tensor<int32, [16]> tile_57 = const()[name = tensor<string, []>("tile_57"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4663_axis_0 = const()[name = tensor<string, []>("op_4663_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_15 = split(axis = var_4663_axis_0, split_sizes = tile_57, x = var_4662_cast_fp16)[name = tensor<string, []>("op_4663_cast_fp16")];
+            tensor<int32, [4]> var_4680_perm_0 = const()[name = tensor<string, []>("op_4680_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_58 = const()[name = tensor<string, []>("tile_58"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4681_axis_0 = const()[name = tensor<string, []>("op_4681_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_4680_cast_fp16 = transpose(perm = var_4680_perm_0, x = k_39_cast_fp16)[name = tensor<string, []>("transpose_5")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_15 = split(axis = var_4681_axis_0, split_sizes = tile_58, x = var_4680_cast_fp16)[name = tensor<string, []>("op_4681_cast_fp16")];
+            tensor<int32, [16]> tile_59 = const()[name = tensor<string, []>("tile_59"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4698_axis_0 = const()[name = tensor<string, []>("op_4698_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_15 = split(axis = var_4698_axis_0, split_sizes = tile_59, x = var_4660_cast_fp16)[name = tensor<string, []>("op_4698_cast_fp16")];
+            tensor<string, []> aw_609_equation_0 = const()[name = tensor<string, []>("aw_609_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_609_cast_fp16 = einsum(equation = aw_609_equation_0, values = (var_4681_cast_fp16_0, var_4663_cast_fp16_0))[name = tensor<string, []>("aw_609_cast_fp16")];
+            tensor<string, []> aw_611_equation_0 = const()[name = tensor<string, []>("aw_611_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_611_cast_fp16 = einsum(equation = aw_611_equation_0, values = (var_4681_cast_fp16_1, var_4663_cast_fp16_1))[name = tensor<string, []>("aw_611_cast_fp16")];
+            tensor<string, []> aw_613_equation_0 = const()[name = tensor<string, []>("aw_613_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_613_cast_fp16 = einsum(equation = aw_613_equation_0, values = (var_4681_cast_fp16_2, var_4663_cast_fp16_2))[name = tensor<string, []>("aw_613_cast_fp16")];
+            tensor<string, []> aw_615_equation_0 = const()[name = tensor<string, []>("aw_615_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_615_cast_fp16 = einsum(equation = aw_615_equation_0, values = (var_4681_cast_fp16_3, var_4663_cast_fp16_3))[name = tensor<string, []>("aw_615_cast_fp16")];
+            tensor<string, []> aw_617_equation_0 = const()[name = tensor<string, []>("aw_617_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_617_cast_fp16 = einsum(equation = aw_617_equation_0, values = (var_4681_cast_fp16_4, var_4663_cast_fp16_4))[name = tensor<string, []>("aw_617_cast_fp16")];
+            tensor<string, []> aw_619_equation_0 = const()[name = tensor<string, []>("aw_619_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_619_cast_fp16 = einsum(equation = aw_619_equation_0, values = (var_4681_cast_fp16_5, var_4663_cast_fp16_5))[name = tensor<string, []>("aw_619_cast_fp16")];
+            tensor<string, []> aw_621_equation_0 = const()[name = tensor<string, []>("aw_621_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_621_cast_fp16 = einsum(equation = aw_621_equation_0, values = (var_4681_cast_fp16_6, var_4663_cast_fp16_6))[name = tensor<string, []>("aw_621_cast_fp16")];
+            tensor<string, []> aw_623_equation_0 = const()[name = tensor<string, []>("aw_623_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_623_cast_fp16 = einsum(equation = aw_623_equation_0, values = (var_4681_cast_fp16_7, var_4663_cast_fp16_7))[name = tensor<string, []>("aw_623_cast_fp16")];
+            tensor<string, []> aw_625_equation_0 = const()[name = tensor<string, []>("aw_625_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_625_cast_fp16 = einsum(equation = aw_625_equation_0, values = (var_4681_cast_fp16_8, var_4663_cast_fp16_8))[name = tensor<string, []>("aw_625_cast_fp16")];
+            tensor<string, []> aw_627_equation_0 = const()[name = tensor<string, []>("aw_627_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_627_cast_fp16 = einsum(equation = aw_627_equation_0, values = (var_4681_cast_fp16_9, var_4663_cast_fp16_9))[name = tensor<string, []>("aw_627_cast_fp16")];
+            tensor<string, []> aw_629_equation_0 = const()[name = tensor<string, []>("aw_629_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_629_cast_fp16 = einsum(equation = aw_629_equation_0, values = (var_4681_cast_fp16_10, var_4663_cast_fp16_10))[name = tensor<string, []>("aw_629_cast_fp16")];
+            tensor<string, []> aw_631_equation_0 = const()[name = tensor<string, []>("aw_631_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_631_cast_fp16 = einsum(equation = aw_631_equation_0, values = (var_4681_cast_fp16_11, var_4663_cast_fp16_11))[name = tensor<string, []>("aw_631_cast_fp16")];
+            tensor<string, []> aw_633_equation_0 = const()[name = tensor<string, []>("aw_633_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_633_cast_fp16 = einsum(equation = aw_633_equation_0, values = (var_4681_cast_fp16_12, var_4663_cast_fp16_12))[name = tensor<string, []>("aw_633_cast_fp16")];
+            tensor<string, []> aw_635_equation_0 = const()[name = tensor<string, []>("aw_635_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_635_cast_fp16 = einsum(equation = aw_635_equation_0, values = (var_4681_cast_fp16_13, var_4663_cast_fp16_13))[name = tensor<string, []>("aw_635_cast_fp16")];
+            tensor<string, []> aw_637_equation_0 = const()[name = tensor<string, []>("aw_637_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_637_cast_fp16 = einsum(equation = aw_637_equation_0, values = (var_4681_cast_fp16_14, var_4663_cast_fp16_14))[name = tensor<string, []>("aw_637_cast_fp16")];
+            tensor<string, []> aw_639_equation_0 = const()[name = tensor<string, []>("aw_639_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_639_cast_fp16 = einsum(equation = aw_639_equation_0, values = (var_4681_cast_fp16_15, var_4663_cast_fp16_15))[name = tensor<string, []>("aw_639_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4747_cast_fp16 = softmax(axis = var_4611, x = aw_609_cast_fp16)[name = tensor<string, []>("op_4747_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4748_cast_fp16 = softmax(axis = var_4611, x = aw_611_cast_fp16)[name = tensor<string, []>("op_4748_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4749_cast_fp16 = softmax(axis = var_4611, x = aw_613_cast_fp16)[name = tensor<string, []>("op_4749_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4750_cast_fp16 = softmax(axis = var_4611, x = aw_615_cast_fp16)[name = tensor<string, []>("op_4750_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4751_cast_fp16 = softmax(axis = var_4611, x = aw_617_cast_fp16)[name = tensor<string, []>("op_4751_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4752_cast_fp16 = softmax(axis = var_4611, x = aw_619_cast_fp16)[name = tensor<string, []>("op_4752_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4753_cast_fp16 = softmax(axis = var_4611, x = aw_621_cast_fp16)[name = tensor<string, []>("op_4753_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4754_cast_fp16 = softmax(axis = var_4611, x = aw_623_cast_fp16)[name = tensor<string, []>("op_4754_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4755_cast_fp16 = softmax(axis = var_4611, x = aw_625_cast_fp16)[name = tensor<string, []>("op_4755_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4756_cast_fp16 = softmax(axis = var_4611, x = aw_627_cast_fp16)[name = tensor<string, []>("op_4756_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4757_cast_fp16 = softmax(axis = var_4611, x = aw_629_cast_fp16)[name = tensor<string, []>("op_4757_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4758_cast_fp16 = softmax(axis = var_4611, x = aw_631_cast_fp16)[name = tensor<string, []>("op_4758_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4759_cast_fp16 = softmax(axis = var_4611, x = aw_633_cast_fp16)[name = tensor<string, []>("op_4759_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4760_cast_fp16 = softmax(axis = var_4611, x = aw_635_cast_fp16)[name = tensor<string, []>("op_4760_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4761_cast_fp16 = softmax(axis = var_4611, x = aw_637_cast_fp16)[name = tensor<string, []>("op_4761_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4762_cast_fp16 = softmax(axis = var_4611, x = aw_639_cast_fp16)[name = tensor<string, []>("op_4762_cast_fp16")];
+            tensor<string, []> var_4764_equation_0 = const()[name = tensor<string, []>("op_4764_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4764_cast_fp16 = einsum(equation = var_4764_equation_0, values = (var_4698_cast_fp16_0, var_4747_cast_fp16))[name = tensor<string, []>("op_4764_cast_fp16")];
+            tensor<string, []> var_4766_equation_0 = const()[name = tensor<string, []>("op_4766_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4766_cast_fp16 = einsum(equation = var_4766_equation_0, values = (var_4698_cast_fp16_1, var_4748_cast_fp16))[name = tensor<string, []>("op_4766_cast_fp16")];
+            tensor<string, []> var_4768_equation_0 = const()[name = tensor<string, []>("op_4768_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4768_cast_fp16 = einsum(equation = var_4768_equation_0, values = (var_4698_cast_fp16_2, var_4749_cast_fp16))[name = tensor<string, []>("op_4768_cast_fp16")];
+            tensor<string, []> var_4770_equation_0 = const()[name = tensor<string, []>("op_4770_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4770_cast_fp16 = einsum(equation = var_4770_equation_0, values = (var_4698_cast_fp16_3, var_4750_cast_fp16))[name = tensor<string, []>("op_4770_cast_fp16")];
+            tensor<string, []> var_4772_equation_0 = const()[name = tensor<string, []>("op_4772_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4772_cast_fp16 = einsum(equation = var_4772_equation_0, values = (var_4698_cast_fp16_4, var_4751_cast_fp16))[name = tensor<string, []>("op_4772_cast_fp16")];
+            tensor<string, []> var_4774_equation_0 = const()[name = tensor<string, []>("op_4774_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4774_cast_fp16 = einsum(equation = var_4774_equation_0, values = (var_4698_cast_fp16_5, var_4752_cast_fp16))[name = tensor<string, []>("op_4774_cast_fp16")];
+            tensor<string, []> var_4776_equation_0 = const()[name = tensor<string, []>("op_4776_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4776_cast_fp16 = einsum(equation = var_4776_equation_0, values = (var_4698_cast_fp16_6, var_4753_cast_fp16))[name = tensor<string, []>("op_4776_cast_fp16")];
+            tensor<string, []> var_4778_equation_0 = const()[name = tensor<string, []>("op_4778_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4778_cast_fp16 = einsum(equation = var_4778_equation_0, values = (var_4698_cast_fp16_7, var_4754_cast_fp16))[name = tensor<string, []>("op_4778_cast_fp16")];
+            tensor<string, []> var_4780_equation_0 = const()[name = tensor<string, []>("op_4780_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4780_cast_fp16 = einsum(equation = var_4780_equation_0, values = (var_4698_cast_fp16_8, var_4755_cast_fp16))[name = tensor<string, []>("op_4780_cast_fp16")];
+            tensor<string, []> var_4782_equation_0 = const()[name = tensor<string, []>("op_4782_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4782_cast_fp16 = einsum(equation = var_4782_equation_0, values = (var_4698_cast_fp16_9, var_4756_cast_fp16))[name = tensor<string, []>("op_4782_cast_fp16")];
+            tensor<string, []> var_4784_equation_0 = const()[name = tensor<string, []>("op_4784_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4784_cast_fp16 = einsum(equation = var_4784_equation_0, values = (var_4698_cast_fp16_10, var_4757_cast_fp16))[name = tensor<string, []>("op_4784_cast_fp16")];
+            tensor<string, []> var_4786_equation_0 = const()[name = tensor<string, []>("op_4786_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4786_cast_fp16 = einsum(equation = var_4786_equation_0, values = (var_4698_cast_fp16_11, var_4758_cast_fp16))[name = tensor<string, []>("op_4786_cast_fp16")];
+            tensor<string, []> var_4788_equation_0 = const()[name = tensor<string, []>("op_4788_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4788_cast_fp16 = einsum(equation = var_4788_equation_0, values = (var_4698_cast_fp16_12, var_4759_cast_fp16))[name = tensor<string, []>("op_4788_cast_fp16")];
+            tensor<string, []> var_4790_equation_0 = const()[name = tensor<string, []>("op_4790_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4790_cast_fp16 = einsum(equation = var_4790_equation_0, values = (var_4698_cast_fp16_13, var_4760_cast_fp16))[name = tensor<string, []>("op_4790_cast_fp16")];
+            tensor<string, []> var_4792_equation_0 = const()[name = tensor<string, []>("op_4792_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16 = einsum(equation = var_4792_equation_0, values = (var_4698_cast_fp16_14, var_4761_cast_fp16))[name = tensor<string, []>("op_4792_cast_fp16")];
+            tensor<string, []> var_4794_equation_0 = const()[name = tensor<string, []>("op_4794_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4794_cast_fp16 = einsum(equation = var_4794_equation_0, values = (var_4698_cast_fp16_15, var_4762_cast_fp16))[name = tensor<string, []>("op_4794_cast_fp16")];
+            tensor<bool, []> input_195_interleave_0 = const()[name = tensor<string, []>("input_195_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_195_cast_fp16 = concat(axis = var_4611, interleave = input_195_interleave_0, values = (var_4764_cast_fp16, var_4766_cast_fp16, var_4768_cast_fp16, var_4770_cast_fp16, var_4772_cast_fp16, var_4774_cast_fp16, var_4776_cast_fp16, var_4778_cast_fp16, var_4780_cast_fp16, var_4782_cast_fp16, var_4784_cast_fp16, var_4786_cast_fp16, var_4788_cast_fp16, var_4790_cast_fp16, var_4792_cast_fp16, var_4794_cast_fp16))[name = tensor<string, []>("input_195_cast_fp16")];
+            tensor<string, []> var_4803_pad_type_0 = const()[name = tensor<string, []>("op_4803_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4803_strides_0 = const()[name = tensor<string, []>("op_4803_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4803_pad_0 = const()[name = tensor<string, []>("op_4803_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4803_dilations_0 = const()[name = tensor<string, []>("op_4803_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4803_groups_0 = const()[name = tensor<string, []>("op_4803_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_19_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(494795392)))];
+            tensor<fp16, [1024]> blocks_19_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496892608)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4803_cast_fp16 = conv(bias = blocks_19_attn_out_bias_to_fp16, dilations = var_4803_dilations_0, groups = var_4803_groups_0, pad = var_4803_pad_0, pad_type = var_4803_pad_type_0, strides = var_4803_strides_0, weight = blocks_19_attn_out_weight_to_fp16, x = input_195_cast_fp16)[name = tensor<string, []>("op_4803_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_79_cast_fp16 = add(x = inputs_77_cast_fp16, y = var_4803_cast_fp16)[name = tensor<string, []>("inputs_79_cast_fp16")];
+            tensor<int32, [1]> input_197_axes_0 = const()[name = tensor<string, []>("input_197_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_197_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_197_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496894720)))];
+            tensor<fp16, [1024]> input_197_beta_0_to_fp16 = const()[name = tensor<string, []>("input_197_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496896832)))];
+            tensor<fp16, []> var_4813_to_fp16 = const()[name = tensor<string, []>("op_4813_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_197_cast_fp16 = layer_norm(axes = input_197_axes_0, beta = input_197_beta_0_to_fp16, epsilon = var_4813_to_fp16, gamma = input_197_gamma_0_to_fp16, x = inputs_79_cast_fp16)[name = tensor<string, []>("input_197_cast_fp16")];
+            tensor<string, []> input_199_pad_type_0 = const()[name = tensor<string, []>("input_199_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_199_strides_0 = const()[name = tensor<string, []>("input_199_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_199_pad_0 = const()[name = tensor<string, []>("input_199_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_199_dilations_0 = const()[name = tensor<string, []>("input_199_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_199_groups_0 = const()[name = tensor<string, []>("input_199_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_19_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496898944)))];
+            tensor<fp16, [4096]> blocks_19_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(505287616)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_199_cast_fp16 = conv(bias = blocks_19_mlp_0_bias_to_fp16, dilations = input_199_dilations_0, groups = input_199_groups_0, pad = input_199_pad_0, pad_type = input_199_pad_type_0, strides = input_199_strides_0, weight = blocks_19_mlp_0_weight_to_fp16, x = input_197_cast_fp16)[name = tensor<string, []>("input_199_cast_fp16")];
+            tensor<string, []> input_201_mode_0 = const()[name = tensor<string, []>("input_201_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_201_cast_fp16 = gelu(mode = input_201_mode_0, x = input_199_cast_fp16)[name = tensor<string, []>("input_201_cast_fp16")];
+            tensor<string, []> var_4839_pad_type_0 = const()[name = tensor<string, []>("op_4839_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4839_strides_0 = const()[name = tensor<string, []>("op_4839_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4839_pad_0 = const()[name = tensor<string, []>("op_4839_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4839_dilations_0 = const()[name = tensor<string, []>("op_4839_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4839_groups_0 = const()[name = tensor<string, []>("op_4839_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_19_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(505295872)))];
+            tensor<fp16, [1024]> blocks_19_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(513684544)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4839_cast_fp16 = conv(bias = blocks_19_mlp_2_bias_to_fp16, dilations = var_4839_dilations_0, groups = var_4839_groups_0, pad = var_4839_pad_0, pad_type = var_4839_pad_type_0, strides = var_4839_strides_0, weight = blocks_19_mlp_2_weight_to_fp16, x = input_201_cast_fp16)[name = tensor<string, []>("op_4839_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_81_cast_fp16 = add(x = inputs_79_cast_fp16, y = var_4839_cast_fp16)[name = tensor<string, []>("inputs_81_cast_fp16")];
+            tensor<int32, []> var_4848 = const()[name = tensor<string, []>("op_4848"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_203_axes_0 = const()[name = tensor<string, []>("input_203_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_203_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_203_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(513686656)))];
+            tensor<fp16, [1024]> input_203_beta_0_to_fp16 = const()[name = tensor<string, []>("input_203_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(513688768)))];
+            tensor<fp16, []> var_4864_to_fp16 = const()[name = tensor<string, []>("op_4864_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_203_cast_fp16 = layer_norm(axes = input_203_axes_0, beta = input_203_beta_0_to_fp16, epsilon = var_4864_to_fp16, gamma = input_203_gamma_0_to_fp16, x = inputs_81_cast_fp16)[name = tensor<string, []>("input_203_cast_fp16")];
+            tensor<string, []> q_41_pad_type_0 = const()[name = tensor<string, []>("q_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_41_strides_0 = const()[name = tensor<string, []>("q_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_41_pad_0 = const()[name = tensor<string, []>("q_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_41_dilations_0 = const()[name = tensor<string, []>("q_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_41_groups_0 = const()[name = tensor<string, []>("q_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_4899_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4899_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(513690880)))];
+            tensor<fp16, [1024]> var_4899_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4899_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(515788096)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4899_cast_fp16 = conv(bias = var_4899_bias_0_to_fp16, dilations = q_41_dilations_0, groups = q_41_groups_0, pad = q_41_pad_0, pad_type = q_41_pad_type_0, strides = q_41_strides_0, weight = var_4899_weight_0_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("op_4899_cast_fp16")];
+            tensor<string, []> k_41_pad_type_0 = const()[name = tensor<string, []>("k_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_41_strides_0 = const()[name = tensor<string, []>("k_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_41_pad_0 = const()[name = tensor<string, []>("k_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_41_dilations_0 = const()[name = tensor<string, []>("k_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_41_groups_0 = const()[name = tensor<string, []>("k_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_20_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(515790208)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_41_cast_fp16 = conv(dilations = k_41_dilations_0, groups = k_41_groups_0, pad = k_41_pad_0, pad_type = k_41_pad_type_0, strides = k_41_strides_0, weight = blocks_20_attn_key_weight_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("k_41_cast_fp16")];
+            tensor<string, []> var_4897_pad_type_0 = const()[name = tensor<string, []>("op_4897_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4897_strides_0 = const()[name = tensor<string, []>("op_4897_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4897_pad_0 = const()[name = tensor<string, []>("op_4897_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4897_dilations_0 = const()[name = tensor<string, []>("op_4897_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4897_groups_0 = const()[name = tensor<string, []>("op_4897_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_20_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(517887424)))];
+            tensor<fp16, [1024]> blocks_20_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(519984640)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4897_cast_fp16 = conv(bias = blocks_20_attn_value_bias_to_fp16, dilations = var_4897_dilations_0, groups = var_4897_groups_0, pad = var_4897_pad_0, pad_type = var_4897_pad_type_0, strides = var_4897_strides_0, weight = blocks_20_attn_value_weight_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("op_4897_cast_fp16")];
+            tensor<int32, [16]> tile_60 = const()[name = tensor<string, []>("tile_60"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4900_axis_0 = const()[name = tensor<string, []>("op_4900_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_15 = split(axis = var_4900_axis_0, split_sizes = tile_60, x = var_4899_cast_fp16)[name = tensor<string, []>("op_4900_cast_fp16")];
+            tensor<int32, [4]> var_4917_perm_0 = const()[name = tensor<string, []>("op_4917_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_61 = const()[name = tensor<string, []>("tile_61"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4918_axis_0 = const()[name = tensor<string, []>("op_4918_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_4917_cast_fp16 = transpose(perm = var_4917_perm_0, x = k_41_cast_fp16)[name = tensor<string, []>("transpose_4")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_15 = split(axis = var_4918_axis_0, split_sizes = tile_61, x = var_4917_cast_fp16)[name = tensor<string, []>("op_4918_cast_fp16")];
+            tensor<int32, [16]> tile_62 = const()[name = tensor<string, []>("tile_62"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4935_axis_0 = const()[name = tensor<string, []>("op_4935_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_15 = split(axis = var_4935_axis_0, split_sizes = tile_62, x = var_4897_cast_fp16)[name = tensor<string, []>("op_4935_cast_fp16")];
+            tensor<string, []> aw_641_equation_0 = const()[name = tensor<string, []>("aw_641_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_641_cast_fp16 = einsum(equation = aw_641_equation_0, values = (var_4918_cast_fp16_0, var_4900_cast_fp16_0))[name = tensor<string, []>("aw_641_cast_fp16")];
+            tensor<string, []> aw_643_equation_0 = const()[name = tensor<string, []>("aw_643_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_643_cast_fp16 = einsum(equation = aw_643_equation_0, values = (var_4918_cast_fp16_1, var_4900_cast_fp16_1))[name = tensor<string, []>("aw_643_cast_fp16")];
+            tensor<string, []> aw_645_equation_0 = const()[name = tensor<string, []>("aw_645_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_645_cast_fp16 = einsum(equation = aw_645_equation_0, values = (var_4918_cast_fp16_2, var_4900_cast_fp16_2))[name = tensor<string, []>("aw_645_cast_fp16")];
+            tensor<string, []> aw_647_equation_0 = const()[name = tensor<string, []>("aw_647_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_647_cast_fp16 = einsum(equation = aw_647_equation_0, values = (var_4918_cast_fp16_3, var_4900_cast_fp16_3))[name = tensor<string, []>("aw_647_cast_fp16")];
+            tensor<string, []> aw_649_equation_0 = const()[name = tensor<string, []>("aw_649_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_649_cast_fp16 = einsum(equation = aw_649_equation_0, values = (var_4918_cast_fp16_4, var_4900_cast_fp16_4))[name = tensor<string, []>("aw_649_cast_fp16")];
+            tensor<string, []> aw_651_equation_0 = const()[name = tensor<string, []>("aw_651_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_651_cast_fp16 = einsum(equation = aw_651_equation_0, values = (var_4918_cast_fp16_5, var_4900_cast_fp16_5))[name = tensor<string, []>("aw_651_cast_fp16")];
+            tensor<string, []> aw_653_equation_0 = const()[name = tensor<string, []>("aw_653_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_653_cast_fp16 = einsum(equation = aw_653_equation_0, values = (var_4918_cast_fp16_6, var_4900_cast_fp16_6))[name = tensor<string, []>("aw_653_cast_fp16")];
+            tensor<string, []> aw_655_equation_0 = const()[name = tensor<string, []>("aw_655_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_655_cast_fp16 = einsum(equation = aw_655_equation_0, values = (var_4918_cast_fp16_7, var_4900_cast_fp16_7))[name = tensor<string, []>("aw_655_cast_fp16")];
+            tensor<string, []> aw_657_equation_0 = const()[name = tensor<string, []>("aw_657_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_657_cast_fp16 = einsum(equation = aw_657_equation_0, values = (var_4918_cast_fp16_8, var_4900_cast_fp16_8))[name = tensor<string, []>("aw_657_cast_fp16")];
+            tensor<string, []> aw_659_equation_0 = const()[name = tensor<string, []>("aw_659_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_659_cast_fp16 = einsum(equation = aw_659_equation_0, values = (var_4918_cast_fp16_9, var_4900_cast_fp16_9))[name = tensor<string, []>("aw_659_cast_fp16")];
+            tensor<string, []> aw_661_equation_0 = const()[name = tensor<string, []>("aw_661_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_661_cast_fp16 = einsum(equation = aw_661_equation_0, values = (var_4918_cast_fp16_10, var_4900_cast_fp16_10))[name = tensor<string, []>("aw_661_cast_fp16")];
+            tensor<string, []> aw_663_equation_0 = const()[name = tensor<string, []>("aw_663_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_663_cast_fp16 = einsum(equation = aw_663_equation_0, values = (var_4918_cast_fp16_11, var_4900_cast_fp16_11))[name = tensor<string, []>("aw_663_cast_fp16")];
+            tensor<string, []> aw_665_equation_0 = const()[name = tensor<string, []>("aw_665_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_665_cast_fp16 = einsum(equation = aw_665_equation_0, values = (var_4918_cast_fp16_12, var_4900_cast_fp16_12))[name = tensor<string, []>("aw_665_cast_fp16")];
+            tensor<string, []> aw_667_equation_0 = const()[name = tensor<string, []>("aw_667_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_667_cast_fp16 = einsum(equation = aw_667_equation_0, values = (var_4918_cast_fp16_13, var_4900_cast_fp16_13))[name = tensor<string, []>("aw_667_cast_fp16")];
+            tensor<string, []> aw_669_equation_0 = const()[name = tensor<string, []>("aw_669_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_669_cast_fp16 = einsum(equation = aw_669_equation_0, values = (var_4918_cast_fp16_14, var_4900_cast_fp16_14))[name = tensor<string, []>("aw_669_cast_fp16")];
+            tensor<string, []> aw_671_equation_0 = const()[name = tensor<string, []>("aw_671_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_671_cast_fp16 = einsum(equation = aw_671_equation_0, values = (var_4918_cast_fp16_15, var_4900_cast_fp16_15))[name = tensor<string, []>("aw_671_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4984_cast_fp16 = softmax(axis = var_4848, x = aw_641_cast_fp16)[name = tensor<string, []>("op_4984_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4985_cast_fp16 = softmax(axis = var_4848, x = aw_643_cast_fp16)[name = tensor<string, []>("op_4985_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4986_cast_fp16 = softmax(axis = var_4848, x = aw_645_cast_fp16)[name = tensor<string, []>("op_4986_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4987_cast_fp16 = softmax(axis = var_4848, x = aw_647_cast_fp16)[name = tensor<string, []>("op_4987_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4988_cast_fp16 = softmax(axis = var_4848, x = aw_649_cast_fp16)[name = tensor<string, []>("op_4988_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4989_cast_fp16 = softmax(axis = var_4848, x = aw_651_cast_fp16)[name = tensor<string, []>("op_4989_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4990_cast_fp16 = softmax(axis = var_4848, x = aw_653_cast_fp16)[name = tensor<string, []>("op_4990_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4991_cast_fp16 = softmax(axis = var_4848, x = aw_655_cast_fp16)[name = tensor<string, []>("op_4991_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4992_cast_fp16 = softmax(axis = var_4848, x = aw_657_cast_fp16)[name = tensor<string, []>("op_4992_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4993_cast_fp16 = softmax(axis = var_4848, x = aw_659_cast_fp16)[name = tensor<string, []>("op_4993_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4994_cast_fp16 = softmax(axis = var_4848, x = aw_661_cast_fp16)[name = tensor<string, []>("op_4994_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4995_cast_fp16 = softmax(axis = var_4848, x = aw_663_cast_fp16)[name = tensor<string, []>("op_4995_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4996_cast_fp16 = softmax(axis = var_4848, x = aw_665_cast_fp16)[name = tensor<string, []>("op_4996_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4997_cast_fp16 = softmax(axis = var_4848, x = aw_667_cast_fp16)[name = tensor<string, []>("op_4997_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4998_cast_fp16 = softmax(axis = var_4848, x = aw_669_cast_fp16)[name = tensor<string, []>("op_4998_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4999_cast_fp16 = softmax(axis = var_4848, x = aw_671_cast_fp16)[name = tensor<string, []>("op_4999_cast_fp16")];
+            tensor<string, []> var_5001_equation_0 = const()[name = tensor<string, []>("op_5001_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5001_cast_fp16 = einsum(equation = var_5001_equation_0, values = (var_4935_cast_fp16_0, var_4984_cast_fp16))[name = tensor<string, []>("op_5001_cast_fp16")];
+            tensor<string, []> var_5003_equation_0 = const()[name = tensor<string, []>("op_5003_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5003_cast_fp16 = einsum(equation = var_5003_equation_0, values = (var_4935_cast_fp16_1, var_4985_cast_fp16))[name = tensor<string, []>("op_5003_cast_fp16")];
+            tensor<string, []> var_5005_equation_0 = const()[name = tensor<string, []>("op_5005_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5005_cast_fp16 = einsum(equation = var_5005_equation_0, values = (var_4935_cast_fp16_2, var_4986_cast_fp16))[name = tensor<string, []>("op_5005_cast_fp16")];
+            tensor<string, []> var_5007_equation_0 = const()[name = tensor<string, []>("op_5007_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5007_cast_fp16 = einsum(equation = var_5007_equation_0, values = (var_4935_cast_fp16_3, var_4987_cast_fp16))[name = tensor<string, []>("op_5007_cast_fp16")];
+            tensor<string, []> var_5009_equation_0 = const()[name = tensor<string, []>("op_5009_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5009_cast_fp16 = einsum(equation = var_5009_equation_0, values = (var_4935_cast_fp16_4, var_4988_cast_fp16))[name = tensor<string, []>("op_5009_cast_fp16")];
+            tensor<string, []> var_5011_equation_0 = const()[name = tensor<string, []>("op_5011_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5011_cast_fp16 = einsum(equation = var_5011_equation_0, values = (var_4935_cast_fp16_5, var_4989_cast_fp16))[name = tensor<string, []>("op_5011_cast_fp16")];
+            tensor<string, []> var_5013_equation_0 = const()[name = tensor<string, []>("op_5013_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5013_cast_fp16 = einsum(equation = var_5013_equation_0, values = (var_4935_cast_fp16_6, var_4990_cast_fp16))[name = tensor<string, []>("op_5013_cast_fp16")];
+            tensor<string, []> var_5015_equation_0 = const()[name = tensor<string, []>("op_5015_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5015_cast_fp16 = einsum(equation = var_5015_equation_0, values = (var_4935_cast_fp16_7, var_4991_cast_fp16))[name = tensor<string, []>("op_5015_cast_fp16")];
+            tensor<string, []> var_5017_equation_0 = const()[name = tensor<string, []>("op_5017_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5017_cast_fp16 = einsum(equation = var_5017_equation_0, values = (var_4935_cast_fp16_8, var_4992_cast_fp16))[name = tensor<string, []>("op_5017_cast_fp16")];
+            tensor<string, []> var_5019_equation_0 = const()[name = tensor<string, []>("op_5019_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5019_cast_fp16 = einsum(equation = var_5019_equation_0, values = (var_4935_cast_fp16_9, var_4993_cast_fp16))[name = tensor<string, []>("op_5019_cast_fp16")];
+            tensor<string, []> var_5021_equation_0 = const()[name = tensor<string, []>("op_5021_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5021_cast_fp16 = einsum(equation = var_5021_equation_0, values = (var_4935_cast_fp16_10, var_4994_cast_fp16))[name = tensor<string, []>("op_5021_cast_fp16")];
+            tensor<string, []> var_5023_equation_0 = const()[name = tensor<string, []>("op_5023_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5023_cast_fp16 = einsum(equation = var_5023_equation_0, values = (var_4935_cast_fp16_11, var_4995_cast_fp16))[name = tensor<string, []>("op_5023_cast_fp16")];
+            tensor<string, []> var_5025_equation_0 = const()[name = tensor<string, []>("op_5025_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5025_cast_fp16 = einsum(equation = var_5025_equation_0, values = (var_4935_cast_fp16_12, var_4996_cast_fp16))[name = tensor<string, []>("op_5025_cast_fp16")];
+            tensor<string, []> var_5027_equation_0 = const()[name = tensor<string, []>("op_5027_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5027_cast_fp16 = einsum(equation = var_5027_equation_0, values = (var_4935_cast_fp16_13, var_4997_cast_fp16))[name = tensor<string, []>("op_5027_cast_fp16")];
+            tensor<string, []> var_5029_equation_0 = const()[name = tensor<string, []>("op_5029_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5029_cast_fp16 = einsum(equation = var_5029_equation_0, values = (var_4935_cast_fp16_14, var_4998_cast_fp16))[name = tensor<string, []>("op_5029_cast_fp16")];
+            tensor<string, []> var_5031_equation_0 = const()[name = tensor<string, []>("op_5031_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5031_cast_fp16 = einsum(equation = var_5031_equation_0, values = (var_4935_cast_fp16_15, var_4999_cast_fp16))[name = tensor<string, []>("op_5031_cast_fp16")];
+            tensor<bool, []> input_205_interleave_0 = const()[name = tensor<string, []>("input_205_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_205_cast_fp16 = concat(axis = var_4848, interleave = input_205_interleave_0, values = (var_5001_cast_fp16, var_5003_cast_fp16, var_5005_cast_fp16, var_5007_cast_fp16, var_5009_cast_fp16, var_5011_cast_fp16, var_5013_cast_fp16, var_5015_cast_fp16, var_5017_cast_fp16, var_5019_cast_fp16, var_5021_cast_fp16, var_5023_cast_fp16, var_5025_cast_fp16, var_5027_cast_fp16, var_5029_cast_fp16, var_5031_cast_fp16))[name = tensor<string, []>("input_205_cast_fp16")];
+            tensor<string, []> var_5040_pad_type_0 = const()[name = tensor<string, []>("op_5040_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5040_strides_0 = const()[name = tensor<string, []>("op_5040_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5040_pad_0 = const()[name = tensor<string, []>("op_5040_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5040_dilations_0 = const()[name = tensor<string, []>("op_5040_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5040_groups_0 = const()[name = tensor<string, []>("op_5040_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_20_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(519986752)))];
+            tensor<fp16, [1024]> blocks_20_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(522083968)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5040_cast_fp16 = conv(bias = blocks_20_attn_out_bias_to_fp16, dilations = var_5040_dilations_0, groups = var_5040_groups_0, pad = var_5040_pad_0, pad_type = var_5040_pad_type_0, strides = var_5040_strides_0, weight = blocks_20_attn_out_weight_to_fp16, x = input_205_cast_fp16)[name = tensor<string, []>("op_5040_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_83_cast_fp16 = add(x = inputs_81_cast_fp16, y = var_5040_cast_fp16)[name = tensor<string, []>("inputs_83_cast_fp16")];
+            tensor<int32, [1]> input_207_axes_0 = const()[name = tensor<string, []>("input_207_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_207_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_207_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(522086080)))];
+            tensor<fp16, [1024]> input_207_beta_0_to_fp16 = const()[name = tensor<string, []>("input_207_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(522088192)))];
+            tensor<fp16, []> var_5050_to_fp16 = const()[name = tensor<string, []>("op_5050_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_207_cast_fp16 = layer_norm(axes = input_207_axes_0, beta = input_207_beta_0_to_fp16, epsilon = var_5050_to_fp16, gamma = input_207_gamma_0_to_fp16, x = inputs_83_cast_fp16)[name = tensor<string, []>("input_207_cast_fp16")];
+            tensor<string, []> input_209_pad_type_0 = const()[name = tensor<string, []>("input_209_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_209_strides_0 = const()[name = tensor<string, []>("input_209_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_209_pad_0 = const()[name = tensor<string, []>("input_209_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_209_dilations_0 = const()[name = tensor<string, []>("input_209_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_209_groups_0 = const()[name = tensor<string, []>("input_209_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_20_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(522090304)))];
+            tensor<fp16, [4096]> blocks_20_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(530478976)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_209_cast_fp16 = conv(bias = blocks_20_mlp_0_bias_to_fp16, dilations = input_209_dilations_0, groups = input_209_groups_0, pad = input_209_pad_0, pad_type = input_209_pad_type_0, strides = input_209_strides_0, weight = blocks_20_mlp_0_weight_to_fp16, x = input_207_cast_fp16)[name = tensor<string, []>("input_209_cast_fp16")];
+            tensor<string, []> input_211_mode_0 = const()[name = tensor<string, []>("input_211_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_211_cast_fp16 = gelu(mode = input_211_mode_0, x = input_209_cast_fp16)[name = tensor<string, []>("input_211_cast_fp16")];
+            tensor<string, []> var_5076_pad_type_0 = const()[name = tensor<string, []>("op_5076_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5076_strides_0 = const()[name = tensor<string, []>("op_5076_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5076_pad_0 = const()[name = tensor<string, []>("op_5076_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5076_dilations_0 = const()[name = tensor<string, []>("op_5076_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5076_groups_0 = const()[name = tensor<string, []>("op_5076_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_20_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(530487232)))];
+            tensor<fp16, [1024]> blocks_20_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(538875904)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5076_cast_fp16 = conv(bias = blocks_20_mlp_2_bias_to_fp16, dilations = var_5076_dilations_0, groups = var_5076_groups_0, pad = var_5076_pad_0, pad_type = var_5076_pad_type_0, strides = var_5076_strides_0, weight = blocks_20_mlp_2_weight_to_fp16, x = input_211_cast_fp16)[name = tensor<string, []>("op_5076_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_85_cast_fp16 = add(x = inputs_83_cast_fp16, y = var_5076_cast_fp16)[name = tensor<string, []>("inputs_85_cast_fp16")];
+            tensor<int32, []> var_5085 = const()[name = tensor<string, []>("op_5085"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_213_axes_0 = const()[name = tensor<string, []>("input_213_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_213_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_213_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(538878016)))];
+            tensor<fp16, [1024]> input_213_beta_0_to_fp16 = const()[name = tensor<string, []>("input_213_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(538880128)))];
+            tensor<fp16, []> var_5101_to_fp16 = const()[name = tensor<string, []>("op_5101_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_213_cast_fp16 = layer_norm(axes = input_213_axes_0, beta = input_213_beta_0_to_fp16, epsilon = var_5101_to_fp16, gamma = input_213_gamma_0_to_fp16, x = inputs_85_cast_fp16)[name = tensor<string, []>("input_213_cast_fp16")];
+            tensor<string, []> q_43_pad_type_0 = const()[name = tensor<string, []>("q_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_43_strides_0 = const()[name = tensor<string, []>("q_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_43_pad_0 = const()[name = tensor<string, []>("q_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_43_dilations_0 = const()[name = tensor<string, []>("q_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_43_groups_0 = const()[name = tensor<string, []>("q_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_5136_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5136_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(538882240)))];
+            tensor<fp16, [1024]> var_5136_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5136_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(540979456)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5136_cast_fp16 = conv(bias = var_5136_bias_0_to_fp16, dilations = q_43_dilations_0, groups = q_43_groups_0, pad = q_43_pad_0, pad_type = q_43_pad_type_0, strides = q_43_strides_0, weight = var_5136_weight_0_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("op_5136_cast_fp16")];
+            tensor<string, []> k_43_pad_type_0 = const()[name = tensor<string, []>("k_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_43_strides_0 = const()[name = tensor<string, []>("k_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_43_pad_0 = const()[name = tensor<string, []>("k_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_43_dilations_0 = const()[name = tensor<string, []>("k_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_43_groups_0 = const()[name = tensor<string, []>("k_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_21_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(540981568)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_43_cast_fp16 = conv(dilations = k_43_dilations_0, groups = k_43_groups_0, pad = k_43_pad_0, pad_type = k_43_pad_type_0, strides = k_43_strides_0, weight = blocks_21_attn_key_weight_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("k_43_cast_fp16")];
+            tensor<string, []> var_5134_pad_type_0 = const()[name = tensor<string, []>("op_5134_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5134_strides_0 = const()[name = tensor<string, []>("op_5134_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5134_pad_0 = const()[name = tensor<string, []>("op_5134_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5134_dilations_0 = const()[name = tensor<string, []>("op_5134_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5134_groups_0 = const()[name = tensor<string, []>("op_5134_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_21_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(543078784)))];
+            tensor<fp16, [1024]> blocks_21_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(545176000)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5134_cast_fp16 = conv(bias = blocks_21_attn_value_bias_to_fp16, dilations = var_5134_dilations_0, groups = var_5134_groups_0, pad = var_5134_pad_0, pad_type = var_5134_pad_type_0, strides = var_5134_strides_0, weight = blocks_21_attn_value_weight_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("op_5134_cast_fp16")];
+            tensor<int32, [16]> tile_63 = const()[name = tensor<string, []>("tile_63"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5137_axis_0 = const()[name = tensor<string, []>("op_5137_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_15 = split(axis = var_5137_axis_0, split_sizes = tile_63, x = var_5136_cast_fp16)[name = tensor<string, []>("op_5137_cast_fp16")];
+            tensor<int32, [4]> var_5154_perm_0 = const()[name = tensor<string, []>("op_5154_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_64 = const()[name = tensor<string, []>("tile_64"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5155_axis_0 = const()[name = tensor<string, []>("op_5155_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_5154_cast_fp16 = transpose(perm = var_5154_perm_0, x = k_43_cast_fp16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_15 = split(axis = var_5155_axis_0, split_sizes = tile_64, x = var_5154_cast_fp16)[name = tensor<string, []>("op_5155_cast_fp16")];
+            tensor<int32, [16]> tile_65 = const()[name = tensor<string, []>("tile_65"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5172_axis_0 = const()[name = tensor<string, []>("op_5172_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_15 = split(axis = var_5172_axis_0, split_sizes = tile_65, x = var_5134_cast_fp16)[name = tensor<string, []>("op_5172_cast_fp16")];
+            tensor<string, []> aw_673_equation_0 = const()[name = tensor<string, []>("aw_673_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_673_cast_fp16 = einsum(equation = aw_673_equation_0, values = (var_5155_cast_fp16_0, var_5137_cast_fp16_0))[name = tensor<string, []>("aw_673_cast_fp16")];
+            tensor<string, []> aw_675_equation_0 = const()[name = tensor<string, []>("aw_675_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_675_cast_fp16 = einsum(equation = aw_675_equation_0, values = (var_5155_cast_fp16_1, var_5137_cast_fp16_1))[name = tensor<string, []>("aw_675_cast_fp16")];
+            tensor<string, []> aw_677_equation_0 = const()[name = tensor<string, []>("aw_677_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_677_cast_fp16 = einsum(equation = aw_677_equation_0, values = (var_5155_cast_fp16_2, var_5137_cast_fp16_2))[name = tensor<string, []>("aw_677_cast_fp16")];
+            tensor<string, []> aw_679_equation_0 = const()[name = tensor<string, []>("aw_679_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_679_cast_fp16 = einsum(equation = aw_679_equation_0, values = (var_5155_cast_fp16_3, var_5137_cast_fp16_3))[name = tensor<string, []>("aw_679_cast_fp16")];
+            tensor<string, []> aw_681_equation_0 = const()[name = tensor<string, []>("aw_681_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_681_cast_fp16 = einsum(equation = aw_681_equation_0, values = (var_5155_cast_fp16_4, var_5137_cast_fp16_4))[name = tensor<string, []>("aw_681_cast_fp16")];
+            tensor<string, []> aw_683_equation_0 = const()[name = tensor<string, []>("aw_683_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_683_cast_fp16 = einsum(equation = aw_683_equation_0, values = (var_5155_cast_fp16_5, var_5137_cast_fp16_5))[name = tensor<string, []>("aw_683_cast_fp16")];
+            tensor<string, []> aw_685_equation_0 = const()[name = tensor<string, []>("aw_685_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_685_cast_fp16 = einsum(equation = aw_685_equation_0, values = (var_5155_cast_fp16_6, var_5137_cast_fp16_6))[name = tensor<string, []>("aw_685_cast_fp16")];
+            tensor<string, []> aw_687_equation_0 = const()[name = tensor<string, []>("aw_687_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_687_cast_fp16 = einsum(equation = aw_687_equation_0, values = (var_5155_cast_fp16_7, var_5137_cast_fp16_7))[name = tensor<string, []>("aw_687_cast_fp16")];
+            tensor<string, []> aw_689_equation_0 = const()[name = tensor<string, []>("aw_689_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_689_cast_fp16 = einsum(equation = aw_689_equation_0, values = (var_5155_cast_fp16_8, var_5137_cast_fp16_8))[name = tensor<string, []>("aw_689_cast_fp16")];
+            tensor<string, []> aw_691_equation_0 = const()[name = tensor<string, []>("aw_691_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_691_cast_fp16 = einsum(equation = aw_691_equation_0, values = (var_5155_cast_fp16_9, var_5137_cast_fp16_9))[name = tensor<string, []>("aw_691_cast_fp16")];
+            tensor<string, []> aw_693_equation_0 = const()[name = tensor<string, []>("aw_693_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_693_cast_fp16 = einsum(equation = aw_693_equation_0, values = (var_5155_cast_fp16_10, var_5137_cast_fp16_10))[name = tensor<string, []>("aw_693_cast_fp16")];
+            tensor<string, []> aw_695_equation_0 = const()[name = tensor<string, []>("aw_695_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_695_cast_fp16 = einsum(equation = aw_695_equation_0, values = (var_5155_cast_fp16_11, var_5137_cast_fp16_11))[name = tensor<string, []>("aw_695_cast_fp16")];
+            tensor<string, []> aw_697_equation_0 = const()[name = tensor<string, []>("aw_697_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_697_cast_fp16 = einsum(equation = aw_697_equation_0, values = (var_5155_cast_fp16_12, var_5137_cast_fp16_12))[name = tensor<string, []>("aw_697_cast_fp16")];
+            tensor<string, []> aw_699_equation_0 = const()[name = tensor<string, []>("aw_699_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_699_cast_fp16 = einsum(equation = aw_699_equation_0, values = (var_5155_cast_fp16_13, var_5137_cast_fp16_13))[name = tensor<string, []>("aw_699_cast_fp16")];
+            tensor<string, []> aw_701_equation_0 = const()[name = tensor<string, []>("aw_701_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_701_cast_fp16 = einsum(equation = aw_701_equation_0, values = (var_5155_cast_fp16_14, var_5137_cast_fp16_14))[name = tensor<string, []>("aw_701_cast_fp16")];
+            tensor<string, []> aw_703_equation_0 = const()[name = tensor<string, []>("aw_703_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_703_cast_fp16 = einsum(equation = aw_703_equation_0, values = (var_5155_cast_fp16_15, var_5137_cast_fp16_15))[name = tensor<string, []>("aw_703_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5221_cast_fp16 = softmax(axis = var_5085, x = aw_673_cast_fp16)[name = tensor<string, []>("op_5221_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5222_cast_fp16 = softmax(axis = var_5085, x = aw_675_cast_fp16)[name = tensor<string, []>("op_5222_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5223_cast_fp16 = softmax(axis = var_5085, x = aw_677_cast_fp16)[name = tensor<string, []>("op_5223_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5224_cast_fp16 = softmax(axis = var_5085, x = aw_679_cast_fp16)[name = tensor<string, []>("op_5224_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5225_cast_fp16 = softmax(axis = var_5085, x = aw_681_cast_fp16)[name = tensor<string, []>("op_5225_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5226_cast_fp16 = softmax(axis = var_5085, x = aw_683_cast_fp16)[name = tensor<string, []>("op_5226_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5227_cast_fp16 = softmax(axis = var_5085, x = aw_685_cast_fp16)[name = tensor<string, []>("op_5227_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5228_cast_fp16 = softmax(axis = var_5085, x = aw_687_cast_fp16)[name = tensor<string, []>("op_5228_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5229_cast_fp16 = softmax(axis = var_5085, x = aw_689_cast_fp16)[name = tensor<string, []>("op_5229_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5230_cast_fp16 = softmax(axis = var_5085, x = aw_691_cast_fp16)[name = tensor<string, []>("op_5230_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5231_cast_fp16 = softmax(axis = var_5085, x = aw_693_cast_fp16)[name = tensor<string, []>("op_5231_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5232_cast_fp16 = softmax(axis = var_5085, x = aw_695_cast_fp16)[name = tensor<string, []>("op_5232_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5233_cast_fp16 = softmax(axis = var_5085, x = aw_697_cast_fp16)[name = tensor<string, []>("op_5233_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5234_cast_fp16 = softmax(axis = var_5085, x = aw_699_cast_fp16)[name = tensor<string, []>("op_5234_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5235_cast_fp16 = softmax(axis = var_5085, x = aw_701_cast_fp16)[name = tensor<string, []>("op_5235_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5236_cast_fp16 = softmax(axis = var_5085, x = aw_703_cast_fp16)[name = tensor<string, []>("op_5236_cast_fp16")];
+            tensor<string, []> var_5238_equation_0 = const()[name = tensor<string, []>("op_5238_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5238_cast_fp16 = einsum(equation = var_5238_equation_0, values = (var_5172_cast_fp16_0, var_5221_cast_fp16))[name = tensor<string, []>("op_5238_cast_fp16")];
+            tensor<string, []> var_5240_equation_0 = const()[name = tensor<string, []>("op_5240_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5240_cast_fp16 = einsum(equation = var_5240_equation_0, values = (var_5172_cast_fp16_1, var_5222_cast_fp16))[name = tensor<string, []>("op_5240_cast_fp16")];
+            tensor<string, []> var_5242_equation_0 = const()[name = tensor<string, []>("op_5242_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5242_cast_fp16 = einsum(equation = var_5242_equation_0, values = (var_5172_cast_fp16_2, var_5223_cast_fp16))[name = tensor<string, []>("op_5242_cast_fp16")];
+            tensor<string, []> var_5244_equation_0 = const()[name = tensor<string, []>("op_5244_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5244_cast_fp16 = einsum(equation = var_5244_equation_0, values = (var_5172_cast_fp16_3, var_5224_cast_fp16))[name = tensor<string, []>("op_5244_cast_fp16")];
+            tensor<string, []> var_5246_equation_0 = const()[name = tensor<string, []>("op_5246_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5246_cast_fp16 = einsum(equation = var_5246_equation_0, values = (var_5172_cast_fp16_4, var_5225_cast_fp16))[name = tensor<string, []>("op_5246_cast_fp16")];
+            tensor<string, []> var_5248_equation_0 = const()[name = tensor<string, []>("op_5248_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5248_cast_fp16 = einsum(equation = var_5248_equation_0, values = (var_5172_cast_fp16_5, var_5226_cast_fp16))[name = tensor<string, []>("op_5248_cast_fp16")];
+            tensor<string, []> var_5250_equation_0 = const()[name = tensor<string, []>("op_5250_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5250_cast_fp16 = einsum(equation = var_5250_equation_0, values = (var_5172_cast_fp16_6, var_5227_cast_fp16))[name = tensor<string, []>("op_5250_cast_fp16")];
+            tensor<string, []> var_5252_equation_0 = const()[name = tensor<string, []>("op_5252_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5252_cast_fp16 = einsum(equation = var_5252_equation_0, values = (var_5172_cast_fp16_7, var_5228_cast_fp16))[name = tensor<string, []>("op_5252_cast_fp16")];
+            tensor<string, []> var_5254_equation_0 = const()[name = tensor<string, []>("op_5254_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5254_cast_fp16 = einsum(equation = var_5254_equation_0, values = (var_5172_cast_fp16_8, var_5229_cast_fp16))[name = tensor<string, []>("op_5254_cast_fp16")];
+            tensor<string, []> var_5256_equation_0 = const()[name = tensor<string, []>("op_5256_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5256_cast_fp16 = einsum(equation = var_5256_equation_0, values = (var_5172_cast_fp16_9, var_5230_cast_fp16))[name = tensor<string, []>("op_5256_cast_fp16")];
+            tensor<string, []> var_5258_equation_0 = const()[name = tensor<string, []>("op_5258_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5258_cast_fp16 = einsum(equation = var_5258_equation_0, values = (var_5172_cast_fp16_10, var_5231_cast_fp16))[name = tensor<string, []>("op_5258_cast_fp16")];
+            tensor<string, []> var_5260_equation_0 = const()[name = tensor<string, []>("op_5260_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5260_cast_fp16 = einsum(equation = var_5260_equation_0, values = (var_5172_cast_fp16_11, var_5232_cast_fp16))[name = tensor<string, []>("op_5260_cast_fp16")];
+            tensor<string, []> var_5262_equation_0 = const()[name = tensor<string, []>("op_5262_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5262_cast_fp16 = einsum(equation = var_5262_equation_0, values = (var_5172_cast_fp16_12, var_5233_cast_fp16))[name = tensor<string, []>("op_5262_cast_fp16")];
+            tensor<string, []> var_5264_equation_0 = const()[name = tensor<string, []>("op_5264_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5264_cast_fp16 = einsum(equation = var_5264_equation_0, values = (var_5172_cast_fp16_13, var_5234_cast_fp16))[name = tensor<string, []>("op_5264_cast_fp16")];
+            tensor<string, []> var_5266_equation_0 = const()[name = tensor<string, []>("op_5266_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5266_cast_fp16 = einsum(equation = var_5266_equation_0, values = (var_5172_cast_fp16_14, var_5235_cast_fp16))[name = tensor<string, []>("op_5266_cast_fp16")];
+            tensor<string, []> var_5268_equation_0 = const()[name = tensor<string, []>("op_5268_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5268_cast_fp16 = einsum(equation = var_5268_equation_0, values = (var_5172_cast_fp16_15, var_5236_cast_fp16))[name = tensor<string, []>("op_5268_cast_fp16")];
+            tensor<bool, []> input_215_interleave_0 = const()[name = tensor<string, []>("input_215_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_215_cast_fp16 = concat(axis = var_5085, interleave = input_215_interleave_0, values = (var_5238_cast_fp16, var_5240_cast_fp16, var_5242_cast_fp16, var_5244_cast_fp16, var_5246_cast_fp16, var_5248_cast_fp16, var_5250_cast_fp16, var_5252_cast_fp16, var_5254_cast_fp16, var_5256_cast_fp16, var_5258_cast_fp16, var_5260_cast_fp16, var_5262_cast_fp16, var_5264_cast_fp16, var_5266_cast_fp16, var_5268_cast_fp16))[name = tensor<string, []>("input_215_cast_fp16")];
+            tensor<string, []> var_5277_pad_type_0 = const()[name = tensor<string, []>("op_5277_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5277_strides_0 = const()[name = tensor<string, []>("op_5277_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5277_pad_0 = const()[name = tensor<string, []>("op_5277_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5277_dilations_0 = const()[name = tensor<string, []>("op_5277_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5277_groups_0 = const()[name = tensor<string, []>("op_5277_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_21_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(545178112)))];
+            tensor<fp16, [1024]> blocks_21_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(547275328)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5277_cast_fp16 = conv(bias = blocks_21_attn_out_bias_to_fp16, dilations = var_5277_dilations_0, groups = var_5277_groups_0, pad = var_5277_pad_0, pad_type = var_5277_pad_type_0, strides = var_5277_strides_0, weight = blocks_21_attn_out_weight_to_fp16, x = input_215_cast_fp16)[name = tensor<string, []>("op_5277_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_87_cast_fp16 = add(x = inputs_85_cast_fp16, y = var_5277_cast_fp16)[name = tensor<string, []>("inputs_87_cast_fp16")];
+            tensor<int32, [1]> input_217_axes_0 = const()[name = tensor<string, []>("input_217_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_217_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_217_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(547277440)))];
+            tensor<fp16, [1024]> input_217_beta_0_to_fp16 = const()[name = tensor<string, []>("input_217_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(547279552)))];
+            tensor<fp16, []> var_5287_to_fp16 = const()[name = tensor<string, []>("op_5287_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_217_cast_fp16 = layer_norm(axes = input_217_axes_0, beta = input_217_beta_0_to_fp16, epsilon = var_5287_to_fp16, gamma = input_217_gamma_0_to_fp16, x = inputs_87_cast_fp16)[name = tensor<string, []>("input_217_cast_fp16")];
+            tensor<string, []> input_219_pad_type_0 = const()[name = tensor<string, []>("input_219_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_219_strides_0 = const()[name = tensor<string, []>("input_219_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_219_pad_0 = const()[name = tensor<string, []>("input_219_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_219_dilations_0 = const()[name = tensor<string, []>("input_219_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_219_groups_0 = const()[name = tensor<string, []>("input_219_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_21_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(547281664)))];
+            tensor<fp16, [4096]> blocks_21_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(555670336)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_219_cast_fp16 = conv(bias = blocks_21_mlp_0_bias_to_fp16, dilations = input_219_dilations_0, groups = input_219_groups_0, pad = input_219_pad_0, pad_type = input_219_pad_type_0, strides = input_219_strides_0, weight = blocks_21_mlp_0_weight_to_fp16, x = input_217_cast_fp16)[name = tensor<string, []>("input_219_cast_fp16")];
+            tensor<string, []> input_221_mode_0 = const()[name = tensor<string, []>("input_221_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_221_cast_fp16 = gelu(mode = input_221_mode_0, x = input_219_cast_fp16)[name = tensor<string, []>("input_221_cast_fp16")];
+            tensor<string, []> var_5313_pad_type_0 = const()[name = tensor<string, []>("op_5313_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5313_strides_0 = const()[name = tensor<string, []>("op_5313_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5313_pad_0 = const()[name = tensor<string, []>("op_5313_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5313_dilations_0 = const()[name = tensor<string, []>("op_5313_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5313_groups_0 = const()[name = tensor<string, []>("op_5313_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_21_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(555678592)))];
+            tensor<fp16, [1024]> blocks_21_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(564067264)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5313_cast_fp16 = conv(bias = blocks_21_mlp_2_bias_to_fp16, dilations = var_5313_dilations_0, groups = var_5313_groups_0, pad = var_5313_pad_0, pad_type = var_5313_pad_type_0, strides = var_5313_strides_0, weight = blocks_21_mlp_2_weight_to_fp16, x = input_221_cast_fp16)[name = tensor<string, []>("op_5313_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_89_cast_fp16 = add(x = inputs_87_cast_fp16, y = var_5313_cast_fp16)[name = tensor<string, []>("inputs_89_cast_fp16")];
+            tensor<int32, []> var_5322 = const()[name = tensor<string, []>("op_5322"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_223_axes_0 = const()[name = tensor<string, []>("input_223_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_223_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_223_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(564069376)))];
+            tensor<fp16, [1024]> input_223_beta_0_to_fp16 = const()[name = tensor<string, []>("input_223_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(564071488)))];
+            tensor<fp16, []> var_5338_to_fp16 = const()[name = tensor<string, []>("op_5338_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_223_cast_fp16 = layer_norm(axes = input_223_axes_0, beta = input_223_beta_0_to_fp16, epsilon = var_5338_to_fp16, gamma = input_223_gamma_0_to_fp16, x = inputs_89_cast_fp16)[name = tensor<string, []>("input_223_cast_fp16")];
+            tensor<string, []> q_45_pad_type_0 = const()[name = tensor<string, []>("q_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_45_strides_0 = const()[name = tensor<string, []>("q_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_45_pad_0 = const()[name = tensor<string, []>("q_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_45_dilations_0 = const()[name = tensor<string, []>("q_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_45_groups_0 = const()[name = tensor<string, []>("q_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_5373_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5373_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(564073600)))];
+            tensor<fp16, [1024]> var_5373_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5373_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(566170816)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5373_cast_fp16 = conv(bias = var_5373_bias_0_to_fp16, dilations = q_45_dilations_0, groups = q_45_groups_0, pad = q_45_pad_0, pad_type = q_45_pad_type_0, strides = q_45_strides_0, weight = var_5373_weight_0_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("op_5373_cast_fp16")];
+            tensor<string, []> k_45_pad_type_0 = const()[name = tensor<string, []>("k_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_45_strides_0 = const()[name = tensor<string, []>("k_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_45_pad_0 = const()[name = tensor<string, []>("k_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_45_dilations_0 = const()[name = tensor<string, []>("k_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_45_groups_0 = const()[name = tensor<string, []>("k_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_22_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(566172928)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_45_cast_fp16 = conv(dilations = k_45_dilations_0, groups = k_45_groups_0, pad = k_45_pad_0, pad_type = k_45_pad_type_0, strides = k_45_strides_0, weight = blocks_22_attn_key_weight_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("k_45_cast_fp16")];
+            tensor<string, []> var_5371_pad_type_0 = const()[name = tensor<string, []>("op_5371_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5371_strides_0 = const()[name = tensor<string, []>("op_5371_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5371_pad_0 = const()[name = tensor<string, []>("op_5371_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5371_dilations_0 = const()[name = tensor<string, []>("op_5371_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5371_groups_0 = const()[name = tensor<string, []>("op_5371_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_22_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(568270144)))];
+            tensor<fp16, [1024]> blocks_22_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(570367360)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5371_cast_fp16 = conv(bias = blocks_22_attn_value_bias_to_fp16, dilations = var_5371_dilations_0, groups = var_5371_groups_0, pad = var_5371_pad_0, pad_type = var_5371_pad_type_0, strides = var_5371_strides_0, weight = blocks_22_attn_value_weight_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("op_5371_cast_fp16")];
+            tensor<int32, [16]> tile_66 = const()[name = tensor<string, []>("tile_66"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5374_axis_0 = const()[name = tensor<string, []>("op_5374_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_15 = split(axis = var_5374_axis_0, split_sizes = tile_66, x = var_5373_cast_fp16)[name = tensor<string, []>("op_5374_cast_fp16")];
+            tensor<int32, [4]> var_5391_perm_0 = const()[name = tensor<string, []>("op_5391_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_67 = const()[name = tensor<string, []>("tile_67"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5392_axis_0 = const()[name = tensor<string, []>("op_5392_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_5391_cast_fp16 = transpose(perm = var_5391_perm_0, x = k_45_cast_fp16)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_15 = split(axis = var_5392_axis_0, split_sizes = tile_67, x = var_5391_cast_fp16)[name = tensor<string, []>("op_5392_cast_fp16")];
+            tensor<int32, [16]> tile_68 = const()[name = tensor<string, []>("tile_68"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5409_axis_0 = const()[name = tensor<string, []>("op_5409_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_15 = split(axis = var_5409_axis_0, split_sizes = tile_68, x = var_5371_cast_fp16)[name = tensor<string, []>("op_5409_cast_fp16")];
+            tensor<string, []> aw_705_equation_0 = const()[name = tensor<string, []>("aw_705_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_705_cast_fp16 = einsum(equation = aw_705_equation_0, values = (var_5392_cast_fp16_0, var_5374_cast_fp16_0))[name = tensor<string, []>("aw_705_cast_fp16")];
+            tensor<string, []> aw_707_equation_0 = const()[name = tensor<string, []>("aw_707_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_707_cast_fp16 = einsum(equation = aw_707_equation_0, values = (var_5392_cast_fp16_1, var_5374_cast_fp16_1))[name = tensor<string, []>("aw_707_cast_fp16")];
+            tensor<string, []> aw_709_equation_0 = const()[name = tensor<string, []>("aw_709_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_709_cast_fp16 = einsum(equation = aw_709_equation_0, values = (var_5392_cast_fp16_2, var_5374_cast_fp16_2))[name = tensor<string, []>("aw_709_cast_fp16")];
+            tensor<string, []> aw_711_equation_0 = const()[name = tensor<string, []>("aw_711_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_711_cast_fp16 = einsum(equation = aw_711_equation_0, values = (var_5392_cast_fp16_3, var_5374_cast_fp16_3))[name = tensor<string, []>("aw_711_cast_fp16")];
+            tensor<string, []> aw_713_equation_0 = const()[name = tensor<string, []>("aw_713_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_713_cast_fp16 = einsum(equation = aw_713_equation_0, values = (var_5392_cast_fp16_4, var_5374_cast_fp16_4))[name = tensor<string, []>("aw_713_cast_fp16")];
+            tensor<string, []> aw_715_equation_0 = const()[name = tensor<string, []>("aw_715_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_715_cast_fp16 = einsum(equation = aw_715_equation_0, values = (var_5392_cast_fp16_5, var_5374_cast_fp16_5))[name = tensor<string, []>("aw_715_cast_fp16")];
+            tensor<string, []> aw_717_equation_0 = const()[name = tensor<string, []>("aw_717_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_717_cast_fp16 = einsum(equation = aw_717_equation_0, values = (var_5392_cast_fp16_6, var_5374_cast_fp16_6))[name = tensor<string, []>("aw_717_cast_fp16")];
+            tensor<string, []> aw_719_equation_0 = const()[name = tensor<string, []>("aw_719_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_719_cast_fp16 = einsum(equation = aw_719_equation_0, values = (var_5392_cast_fp16_7, var_5374_cast_fp16_7))[name = tensor<string, []>("aw_719_cast_fp16")];
+            tensor<string, []> aw_721_equation_0 = const()[name = tensor<string, []>("aw_721_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_721_cast_fp16 = einsum(equation = aw_721_equation_0, values = (var_5392_cast_fp16_8, var_5374_cast_fp16_8))[name = tensor<string, []>("aw_721_cast_fp16")];
+            tensor<string, []> aw_723_equation_0 = const()[name = tensor<string, []>("aw_723_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_723_cast_fp16 = einsum(equation = aw_723_equation_0, values = (var_5392_cast_fp16_9, var_5374_cast_fp16_9))[name = tensor<string, []>("aw_723_cast_fp16")];
+            tensor<string, []> aw_725_equation_0 = const()[name = tensor<string, []>("aw_725_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_725_cast_fp16 = einsum(equation = aw_725_equation_0, values = (var_5392_cast_fp16_10, var_5374_cast_fp16_10))[name = tensor<string, []>("aw_725_cast_fp16")];
+            tensor<string, []> aw_727_equation_0 = const()[name = tensor<string, []>("aw_727_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_727_cast_fp16 = einsum(equation = aw_727_equation_0, values = (var_5392_cast_fp16_11, var_5374_cast_fp16_11))[name = tensor<string, []>("aw_727_cast_fp16")];
+            tensor<string, []> aw_729_equation_0 = const()[name = tensor<string, []>("aw_729_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_729_cast_fp16 = einsum(equation = aw_729_equation_0, values = (var_5392_cast_fp16_12, var_5374_cast_fp16_12))[name = tensor<string, []>("aw_729_cast_fp16")];
+            tensor<string, []> aw_731_equation_0 = const()[name = tensor<string, []>("aw_731_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_731_cast_fp16 = einsum(equation = aw_731_equation_0, values = (var_5392_cast_fp16_13, var_5374_cast_fp16_13))[name = tensor<string, []>("aw_731_cast_fp16")];
+            tensor<string, []> aw_733_equation_0 = const()[name = tensor<string, []>("aw_733_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_733_cast_fp16 = einsum(equation = aw_733_equation_0, values = (var_5392_cast_fp16_14, var_5374_cast_fp16_14))[name = tensor<string, []>("aw_733_cast_fp16")];
+            tensor<string, []> aw_735_equation_0 = const()[name = tensor<string, []>("aw_735_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_735_cast_fp16 = einsum(equation = aw_735_equation_0, values = (var_5392_cast_fp16_15, var_5374_cast_fp16_15))[name = tensor<string, []>("aw_735_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5458_cast_fp16 = softmax(axis = var_5322, x = aw_705_cast_fp16)[name = tensor<string, []>("op_5458_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5459_cast_fp16 = softmax(axis = var_5322, x = aw_707_cast_fp16)[name = tensor<string, []>("op_5459_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5460_cast_fp16 = softmax(axis = var_5322, x = aw_709_cast_fp16)[name = tensor<string, []>("op_5460_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5461_cast_fp16 = softmax(axis = var_5322, x = aw_711_cast_fp16)[name = tensor<string, []>("op_5461_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5462_cast_fp16 = softmax(axis = var_5322, x = aw_713_cast_fp16)[name = tensor<string, []>("op_5462_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5463_cast_fp16 = softmax(axis = var_5322, x = aw_715_cast_fp16)[name = tensor<string, []>("op_5463_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5464_cast_fp16 = softmax(axis = var_5322, x = aw_717_cast_fp16)[name = tensor<string, []>("op_5464_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5465_cast_fp16 = softmax(axis = var_5322, x = aw_719_cast_fp16)[name = tensor<string, []>("op_5465_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5466_cast_fp16 = softmax(axis = var_5322, x = aw_721_cast_fp16)[name = tensor<string, []>("op_5466_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5467_cast_fp16 = softmax(axis = var_5322, x = aw_723_cast_fp16)[name = tensor<string, []>("op_5467_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5468_cast_fp16 = softmax(axis = var_5322, x = aw_725_cast_fp16)[name = tensor<string, []>("op_5468_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5469_cast_fp16 = softmax(axis = var_5322, x = aw_727_cast_fp16)[name = tensor<string, []>("op_5469_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5470_cast_fp16 = softmax(axis = var_5322, x = aw_729_cast_fp16)[name = tensor<string, []>("op_5470_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5471_cast_fp16 = softmax(axis = var_5322, x = aw_731_cast_fp16)[name = tensor<string, []>("op_5471_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5472_cast_fp16 = softmax(axis = var_5322, x = aw_733_cast_fp16)[name = tensor<string, []>("op_5472_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5473_cast_fp16 = softmax(axis = var_5322, x = aw_735_cast_fp16)[name = tensor<string, []>("op_5473_cast_fp16")];
+            tensor<string, []> var_5475_equation_0 = const()[name = tensor<string, []>("op_5475_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5475_cast_fp16 = einsum(equation = var_5475_equation_0, values = (var_5409_cast_fp16_0, var_5458_cast_fp16))[name = tensor<string, []>("op_5475_cast_fp16")];
+            tensor<string, []> var_5477_equation_0 = const()[name = tensor<string, []>("op_5477_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5477_cast_fp16 = einsum(equation = var_5477_equation_0, values = (var_5409_cast_fp16_1, var_5459_cast_fp16))[name = tensor<string, []>("op_5477_cast_fp16")];
+            tensor<string, []> var_5479_equation_0 = const()[name = tensor<string, []>("op_5479_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5479_cast_fp16 = einsum(equation = var_5479_equation_0, values = (var_5409_cast_fp16_2, var_5460_cast_fp16))[name = tensor<string, []>("op_5479_cast_fp16")];
+            tensor<string, []> var_5481_equation_0 = const()[name = tensor<string, []>("op_5481_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5481_cast_fp16 = einsum(equation = var_5481_equation_0, values = (var_5409_cast_fp16_3, var_5461_cast_fp16))[name = tensor<string, []>("op_5481_cast_fp16")];
+            tensor<string, []> var_5483_equation_0 = const()[name = tensor<string, []>("op_5483_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5483_cast_fp16 = einsum(equation = var_5483_equation_0, values = (var_5409_cast_fp16_4, var_5462_cast_fp16))[name = tensor<string, []>("op_5483_cast_fp16")];
+            tensor<string, []> var_5485_equation_0 = const()[name = tensor<string, []>("op_5485_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5485_cast_fp16 = einsum(equation = var_5485_equation_0, values = (var_5409_cast_fp16_5, var_5463_cast_fp16))[name = tensor<string, []>("op_5485_cast_fp16")];
+            tensor<string, []> var_5487_equation_0 = const()[name = tensor<string, []>("op_5487_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5487_cast_fp16 = einsum(equation = var_5487_equation_0, values = (var_5409_cast_fp16_6, var_5464_cast_fp16))[name = tensor<string, []>("op_5487_cast_fp16")];
+            tensor<string, []> var_5489_equation_0 = const()[name = tensor<string, []>("op_5489_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5489_cast_fp16 = einsum(equation = var_5489_equation_0, values = (var_5409_cast_fp16_7, var_5465_cast_fp16))[name = tensor<string, []>("op_5489_cast_fp16")];
+            tensor<string, []> var_5491_equation_0 = const()[name = tensor<string, []>("op_5491_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5491_cast_fp16 = einsum(equation = var_5491_equation_0, values = (var_5409_cast_fp16_8, var_5466_cast_fp16))[name = tensor<string, []>("op_5491_cast_fp16")];
+            tensor<string, []> var_5493_equation_0 = const()[name = tensor<string, []>("op_5493_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5493_cast_fp16 = einsum(equation = var_5493_equation_0, values = (var_5409_cast_fp16_9, var_5467_cast_fp16))[name = tensor<string, []>("op_5493_cast_fp16")];
+            tensor<string, []> var_5495_equation_0 = const()[name = tensor<string, []>("op_5495_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5495_cast_fp16 = einsum(equation = var_5495_equation_0, values = (var_5409_cast_fp16_10, var_5468_cast_fp16))[name = tensor<string, []>("op_5495_cast_fp16")];
+            tensor<string, []> var_5497_equation_0 = const()[name = tensor<string, []>("op_5497_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5497_cast_fp16 = einsum(equation = var_5497_equation_0, values = (var_5409_cast_fp16_11, var_5469_cast_fp16))[name = tensor<string, []>("op_5497_cast_fp16")];
+            tensor<string, []> var_5499_equation_0 = const()[name = tensor<string, []>("op_5499_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5499_cast_fp16 = einsum(equation = var_5499_equation_0, values = (var_5409_cast_fp16_12, var_5470_cast_fp16))[name = tensor<string, []>("op_5499_cast_fp16")];
+            tensor<string, []> var_5501_equation_0 = const()[name = tensor<string, []>("op_5501_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5501_cast_fp16 = einsum(equation = var_5501_equation_0, values = (var_5409_cast_fp16_13, var_5471_cast_fp16))[name = tensor<string, []>("op_5501_cast_fp16")];
+            tensor<string, []> var_5503_equation_0 = const()[name = tensor<string, []>("op_5503_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5503_cast_fp16 = einsum(equation = var_5503_equation_0, values = (var_5409_cast_fp16_14, var_5472_cast_fp16))[name = tensor<string, []>("op_5503_cast_fp16")];
+            tensor<string, []> var_5505_equation_0 = const()[name = tensor<string, []>("op_5505_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5505_cast_fp16 = einsum(equation = var_5505_equation_0, values = (var_5409_cast_fp16_15, var_5473_cast_fp16))[name = tensor<string, []>("op_5505_cast_fp16")];
+            tensor<bool, []> input_225_interleave_0 = const()[name = tensor<string, []>("input_225_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_225_cast_fp16 = concat(axis = var_5322, interleave = input_225_interleave_0, values = (var_5475_cast_fp16, var_5477_cast_fp16, var_5479_cast_fp16, var_5481_cast_fp16, var_5483_cast_fp16, var_5485_cast_fp16, var_5487_cast_fp16, var_5489_cast_fp16, var_5491_cast_fp16, var_5493_cast_fp16, var_5495_cast_fp16, var_5497_cast_fp16, var_5499_cast_fp16, var_5501_cast_fp16, var_5503_cast_fp16, var_5505_cast_fp16))[name = tensor<string, []>("input_225_cast_fp16")];
+            tensor<string, []> var_5514_pad_type_0 = const()[name = tensor<string, []>("op_5514_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5514_strides_0 = const()[name = tensor<string, []>("op_5514_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5514_pad_0 = const()[name = tensor<string, []>("op_5514_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5514_dilations_0 = const()[name = tensor<string, []>("op_5514_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5514_groups_0 = const()[name = tensor<string, []>("op_5514_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_22_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(570369472)))];
+            tensor<fp16, [1024]> blocks_22_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(572466688)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5514_cast_fp16 = conv(bias = blocks_22_attn_out_bias_to_fp16, dilations = var_5514_dilations_0, groups = var_5514_groups_0, pad = var_5514_pad_0, pad_type = var_5514_pad_type_0, strides = var_5514_strides_0, weight = blocks_22_attn_out_weight_to_fp16, x = input_225_cast_fp16)[name = tensor<string, []>("op_5514_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_91_cast_fp16 = add(x = inputs_89_cast_fp16, y = var_5514_cast_fp16)[name = tensor<string, []>("inputs_91_cast_fp16")];
+            tensor<int32, [1]> input_227_axes_0 = const()[name = tensor<string, []>("input_227_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_227_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_227_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(572468800)))];
+            tensor<fp16, [1024]> input_227_beta_0_to_fp16 = const()[name = tensor<string, []>("input_227_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(572470912)))];
+            tensor<fp16, []> var_5524_to_fp16 = const()[name = tensor<string, []>("op_5524_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_227_cast_fp16 = layer_norm(axes = input_227_axes_0, beta = input_227_beta_0_to_fp16, epsilon = var_5524_to_fp16, gamma = input_227_gamma_0_to_fp16, x = inputs_91_cast_fp16)[name = tensor<string, []>("input_227_cast_fp16")];
+            tensor<string, []> input_229_pad_type_0 = const()[name = tensor<string, []>("input_229_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_229_strides_0 = const()[name = tensor<string, []>("input_229_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_229_pad_0 = const()[name = tensor<string, []>("input_229_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_229_dilations_0 = const()[name = tensor<string, []>("input_229_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_229_groups_0 = const()[name = tensor<string, []>("input_229_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_22_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(572473024)))];
+            tensor<fp16, [4096]> blocks_22_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(580861696)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_229_cast_fp16 = conv(bias = blocks_22_mlp_0_bias_to_fp16, dilations = input_229_dilations_0, groups = input_229_groups_0, pad = input_229_pad_0, pad_type = input_229_pad_type_0, strides = input_229_strides_0, weight = blocks_22_mlp_0_weight_to_fp16, x = input_227_cast_fp16)[name = tensor<string, []>("input_229_cast_fp16")];
+            tensor<string, []> input_231_mode_0 = const()[name = tensor<string, []>("input_231_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_231_cast_fp16 = gelu(mode = input_231_mode_0, x = input_229_cast_fp16)[name = tensor<string, []>("input_231_cast_fp16")];
+            tensor<string, []> var_5550_pad_type_0 = const()[name = tensor<string, []>("op_5550_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5550_strides_0 = const()[name = tensor<string, []>("op_5550_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5550_pad_0 = const()[name = tensor<string, []>("op_5550_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5550_dilations_0 = const()[name = tensor<string, []>("op_5550_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5550_groups_0 = const()[name = tensor<string, []>("op_5550_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_22_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(580869952)))];
+            tensor<fp16, [1024]> blocks_22_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(589258624)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5550_cast_fp16 = conv(bias = blocks_22_mlp_2_bias_to_fp16, dilations = var_5550_dilations_0, groups = var_5550_groups_0, pad = var_5550_pad_0, pad_type = var_5550_pad_type_0, strides = var_5550_strides_0, weight = blocks_22_mlp_2_weight_to_fp16, x = input_231_cast_fp16)[name = tensor<string, []>("op_5550_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_93_cast_fp16 = add(x = inputs_91_cast_fp16, y = var_5550_cast_fp16)[name = tensor<string, []>("inputs_93_cast_fp16")];
+            tensor<int32, []> var_5559 = const()[name = tensor<string, []>("op_5559"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_233_axes_0 = const()[name = tensor<string, []>("input_233_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_233_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_233_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(589260736)))];
+            tensor<fp16, [1024]> input_233_beta_0_to_fp16 = const()[name = tensor<string, []>("input_233_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(589262848)))];
+            tensor<fp16, []> var_5575_to_fp16 = const()[name = tensor<string, []>("op_5575_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_233_cast_fp16 = layer_norm(axes = input_233_axes_0, beta = input_233_beta_0_to_fp16, epsilon = var_5575_to_fp16, gamma = input_233_gamma_0_to_fp16, x = inputs_93_cast_fp16)[name = tensor<string, []>("input_233_cast_fp16")];
+            tensor<string, []> q_pad_type_0 = const()[name = tensor<string, []>("q_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_strides_0 = const()[name = tensor<string, []>("q_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_pad_0 = const()[name = tensor<string, []>("q_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_dilations_0 = const()[name = tensor<string, []>("q_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_groups_0 = const()[name = tensor<string, []>("q_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_5610_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5610_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(589264960)))];
+            tensor<fp16, [1024]> var_5610_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5610_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(591362176)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5610_cast_fp16 = conv(bias = var_5610_bias_0_to_fp16, dilations = q_dilations_0, groups = q_groups_0, pad = q_pad_0, pad_type = q_pad_type_0, strides = q_strides_0, weight = var_5610_weight_0_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("op_5610_cast_fp16")];
+            tensor<string, []> k_pad_type_0 = const()[name = tensor<string, []>("k_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_strides_0 = const()[name = tensor<string, []>("k_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_pad_0 = const()[name = tensor<string, []>("k_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_dilations_0 = const()[name = tensor<string, []>("k_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_groups_0 = const()[name = tensor<string, []>("k_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_23_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(591364288)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_cast_fp16 = conv(dilations = k_dilations_0, groups = k_groups_0, pad = k_pad_0, pad_type = k_pad_type_0, strides = k_strides_0, weight = blocks_23_attn_key_weight_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("k_cast_fp16")];
+            tensor<string, []> var_5608_pad_type_0 = const()[name = tensor<string, []>("op_5608_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5608_strides_0 = const()[name = tensor<string, []>("op_5608_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5608_pad_0 = const()[name = tensor<string, []>("op_5608_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5608_dilations_0 = const()[name = tensor<string, []>("op_5608_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5608_groups_0 = const()[name = tensor<string, []>("op_5608_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_23_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(593461504)))];
+            tensor<fp16, [1024]> blocks_23_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(595558720)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5608_cast_fp16 = conv(bias = blocks_23_attn_value_bias_to_fp16, dilations = var_5608_dilations_0, groups = var_5608_groups_0, pad = var_5608_pad_0, pad_type = var_5608_pad_type_0, strides = var_5608_strides_0, weight = blocks_23_attn_value_weight_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("op_5608_cast_fp16")];
+            tensor<int32, [16]> tile_69 = const()[name = tensor<string, []>("tile_69"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5611_axis_0 = const()[name = tensor<string, []>("op_5611_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_15 = split(axis = var_5611_axis_0, split_sizes = tile_69, x = var_5610_cast_fp16)[name = tensor<string, []>("op_5611_cast_fp16")];
+            tensor<int32, [4]> var_5628_perm_0 = const()[name = tensor<string, []>("op_5628_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_70 = const()[name = tensor<string, []>("tile_70"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5629_axis_0 = const()[name = tensor<string, []>("op_5629_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_5628_cast_fp16 = transpose(perm = var_5628_perm_0, x = k_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_15 = split(axis = var_5629_axis_0, split_sizes = tile_70, x = var_5628_cast_fp16)[name = tensor<string, []>("op_5629_cast_fp16")];
+            tensor<int32, [16]> tile_71 = const()[name = tensor<string, []>("tile_71"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5646_axis_0 = const()[name = tensor<string, []>("op_5646_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_15 = split(axis = var_5646_axis_0, split_sizes = tile_71, x = var_5608_cast_fp16)[name = tensor<string, []>("op_5646_cast_fp16")];
+            tensor<string, []> aw_737_equation_0 = const()[name = tensor<string, []>("aw_737_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_737_cast_fp16 = einsum(equation = aw_737_equation_0, values = (var_5629_cast_fp16_0, var_5611_cast_fp16_0))[name = tensor<string, []>("aw_737_cast_fp16")];
+            tensor<string, []> aw_739_equation_0 = const()[name = tensor<string, []>("aw_739_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_739_cast_fp16 = einsum(equation = aw_739_equation_0, values = (var_5629_cast_fp16_1, var_5611_cast_fp16_1))[name = tensor<string, []>("aw_739_cast_fp16")];
+            tensor<string, []> aw_741_equation_0 = const()[name = tensor<string, []>("aw_741_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_741_cast_fp16 = einsum(equation = aw_741_equation_0, values = (var_5629_cast_fp16_2, var_5611_cast_fp16_2))[name = tensor<string, []>("aw_741_cast_fp16")];
+            tensor<string, []> aw_743_equation_0 = const()[name = tensor<string, []>("aw_743_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_743_cast_fp16 = einsum(equation = aw_743_equation_0, values = (var_5629_cast_fp16_3, var_5611_cast_fp16_3))[name = tensor<string, []>("aw_743_cast_fp16")];
+            tensor<string, []> aw_745_equation_0 = const()[name = tensor<string, []>("aw_745_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_745_cast_fp16 = einsum(equation = aw_745_equation_0, values = (var_5629_cast_fp16_4, var_5611_cast_fp16_4))[name = tensor<string, []>("aw_745_cast_fp16")];
+            tensor<string, []> aw_747_equation_0 = const()[name = tensor<string, []>("aw_747_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_747_cast_fp16 = einsum(equation = aw_747_equation_0, values = (var_5629_cast_fp16_5, var_5611_cast_fp16_5))[name = tensor<string, []>("aw_747_cast_fp16")];
+            tensor<string, []> aw_749_equation_0 = const()[name = tensor<string, []>("aw_749_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_749_cast_fp16 = einsum(equation = aw_749_equation_0, values = (var_5629_cast_fp16_6, var_5611_cast_fp16_6))[name = tensor<string, []>("aw_749_cast_fp16")];
+            tensor<string, []> aw_751_equation_0 = const()[name = tensor<string, []>("aw_751_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_751_cast_fp16 = einsum(equation = aw_751_equation_0, values = (var_5629_cast_fp16_7, var_5611_cast_fp16_7))[name = tensor<string, []>("aw_751_cast_fp16")];
+            tensor<string, []> aw_753_equation_0 = const()[name = tensor<string, []>("aw_753_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_753_cast_fp16 = einsum(equation = aw_753_equation_0, values = (var_5629_cast_fp16_8, var_5611_cast_fp16_8))[name = tensor<string, []>("aw_753_cast_fp16")];
+            tensor<string, []> aw_755_equation_0 = const()[name = tensor<string, []>("aw_755_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_755_cast_fp16 = einsum(equation = aw_755_equation_0, values = (var_5629_cast_fp16_9, var_5611_cast_fp16_9))[name = tensor<string, []>("aw_755_cast_fp16")];
+            tensor<string, []> aw_757_equation_0 = const()[name = tensor<string, []>("aw_757_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_757_cast_fp16 = einsum(equation = aw_757_equation_0, values = (var_5629_cast_fp16_10, var_5611_cast_fp16_10))[name = tensor<string, []>("aw_757_cast_fp16")];
+            tensor<string, []> aw_759_equation_0 = const()[name = tensor<string, []>("aw_759_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_759_cast_fp16 = einsum(equation = aw_759_equation_0, values = (var_5629_cast_fp16_11, var_5611_cast_fp16_11))[name = tensor<string, []>("aw_759_cast_fp16")];
+            tensor<string, []> aw_761_equation_0 = const()[name = tensor<string, []>("aw_761_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_761_cast_fp16 = einsum(equation = aw_761_equation_0, values = (var_5629_cast_fp16_12, var_5611_cast_fp16_12))[name = tensor<string, []>("aw_761_cast_fp16")];
+            tensor<string, []> aw_763_equation_0 = const()[name = tensor<string, []>("aw_763_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_763_cast_fp16 = einsum(equation = aw_763_equation_0, values = (var_5629_cast_fp16_13, var_5611_cast_fp16_13))[name = tensor<string, []>("aw_763_cast_fp16")];
+            tensor<string, []> aw_765_equation_0 = const()[name = tensor<string, []>("aw_765_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_765_cast_fp16 = einsum(equation = aw_765_equation_0, values = (var_5629_cast_fp16_14, var_5611_cast_fp16_14))[name = tensor<string, []>("aw_765_cast_fp16")];
+            tensor<string, []> aw_equation_0 = const()[name = tensor<string, []>("aw_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_cast_fp16 = einsum(equation = aw_equation_0, values = (var_5629_cast_fp16_15, var_5611_cast_fp16_15))[name = tensor<string, []>("aw_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5695_cast_fp16 = softmax(axis = var_5559, x = aw_737_cast_fp16)[name = tensor<string, []>("op_5695_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5696_cast_fp16 = softmax(axis = var_5559, x = aw_739_cast_fp16)[name = tensor<string, []>("op_5696_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5697_cast_fp16 = softmax(axis = var_5559, x = aw_741_cast_fp16)[name = tensor<string, []>("op_5697_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5698_cast_fp16 = softmax(axis = var_5559, x = aw_743_cast_fp16)[name = tensor<string, []>("op_5698_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5699_cast_fp16 = softmax(axis = var_5559, x = aw_745_cast_fp16)[name = tensor<string, []>("op_5699_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5700_cast_fp16 = softmax(axis = var_5559, x = aw_747_cast_fp16)[name = tensor<string, []>("op_5700_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5701_cast_fp16 = softmax(axis = var_5559, x = aw_749_cast_fp16)[name = tensor<string, []>("op_5701_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5702_cast_fp16 = softmax(axis = var_5559, x = aw_751_cast_fp16)[name = tensor<string, []>("op_5702_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5703_cast_fp16 = softmax(axis = var_5559, x = aw_753_cast_fp16)[name = tensor<string, []>("op_5703_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5704_cast_fp16 = softmax(axis = var_5559, x = aw_755_cast_fp16)[name = tensor<string, []>("op_5704_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5705_cast_fp16 = softmax(axis = var_5559, x = aw_757_cast_fp16)[name = tensor<string, []>("op_5705_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5706_cast_fp16 = softmax(axis = var_5559, x = aw_759_cast_fp16)[name = tensor<string, []>("op_5706_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5707_cast_fp16 = softmax(axis = var_5559, x = aw_761_cast_fp16)[name = tensor<string, []>("op_5707_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5708_cast_fp16 = softmax(axis = var_5559, x = aw_763_cast_fp16)[name = tensor<string, []>("op_5708_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5709_cast_fp16 = softmax(axis = var_5559, x = aw_765_cast_fp16)[name = tensor<string, []>("op_5709_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5710_cast_fp16 = softmax(axis = var_5559, x = aw_cast_fp16)[name = tensor<string, []>("op_5710_cast_fp16")];
+            tensor<string, []> var_5712_equation_0 = const()[name = tensor<string, []>("op_5712_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5712_cast_fp16 = einsum(equation = var_5712_equation_0, values = (var_5646_cast_fp16_0, var_5695_cast_fp16))[name = tensor<string, []>("op_5712_cast_fp16")];
+            tensor<string, []> var_5714_equation_0 = const()[name = tensor<string, []>("op_5714_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5714_cast_fp16 = einsum(equation = var_5714_equation_0, values = (var_5646_cast_fp16_1, var_5696_cast_fp16))[name = tensor<string, []>("op_5714_cast_fp16")];
+            tensor<string, []> var_5716_equation_0 = const()[name = tensor<string, []>("op_5716_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5716_cast_fp16 = einsum(equation = var_5716_equation_0, values = (var_5646_cast_fp16_2, var_5697_cast_fp16))[name = tensor<string, []>("op_5716_cast_fp16")];
+            tensor<string, []> var_5718_equation_0 = const()[name = tensor<string, []>("op_5718_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5718_cast_fp16 = einsum(equation = var_5718_equation_0, values = (var_5646_cast_fp16_3, var_5698_cast_fp16))[name = tensor<string, []>("op_5718_cast_fp16")];
+            tensor<string, []> var_5720_equation_0 = const()[name = tensor<string, []>("op_5720_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5720_cast_fp16 = einsum(equation = var_5720_equation_0, values = (var_5646_cast_fp16_4, var_5699_cast_fp16))[name = tensor<string, []>("op_5720_cast_fp16")];
+            tensor<string, []> var_5722_equation_0 = const()[name = tensor<string, []>("op_5722_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5722_cast_fp16 = einsum(equation = var_5722_equation_0, values = (var_5646_cast_fp16_5, var_5700_cast_fp16))[name = tensor<string, []>("op_5722_cast_fp16")];
+            tensor<string, []> var_5724_equation_0 = const()[name = tensor<string, []>("op_5724_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5724_cast_fp16 = einsum(equation = var_5724_equation_0, values = (var_5646_cast_fp16_6, var_5701_cast_fp16))[name = tensor<string, []>("op_5724_cast_fp16")];
+            tensor<string, []> var_5726_equation_0 = const()[name = tensor<string, []>("op_5726_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5726_cast_fp16 = einsum(equation = var_5726_equation_0, values = (var_5646_cast_fp16_7, var_5702_cast_fp16))[name = tensor<string, []>("op_5726_cast_fp16")];
+            tensor<string, []> var_5728_equation_0 = const()[name = tensor<string, []>("op_5728_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5728_cast_fp16 = einsum(equation = var_5728_equation_0, values = (var_5646_cast_fp16_8, var_5703_cast_fp16))[name = tensor<string, []>("op_5728_cast_fp16")];
+            tensor<string, []> var_5730_equation_0 = const()[name = tensor<string, []>("op_5730_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5730_cast_fp16 = einsum(equation = var_5730_equation_0, values = (var_5646_cast_fp16_9, var_5704_cast_fp16))[name = tensor<string, []>("op_5730_cast_fp16")];
+            tensor<string, []> var_5732_equation_0 = const()[name = tensor<string, []>("op_5732_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5732_cast_fp16 = einsum(equation = var_5732_equation_0, values = (var_5646_cast_fp16_10, var_5705_cast_fp16))[name = tensor<string, []>("op_5732_cast_fp16")];
+            tensor<string, []> var_5734_equation_0 = const()[name = tensor<string, []>("op_5734_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5734_cast_fp16 = einsum(equation = var_5734_equation_0, values = (var_5646_cast_fp16_11, var_5706_cast_fp16))[name = tensor<string, []>("op_5734_cast_fp16")];
+            tensor<string, []> var_5736_equation_0 = const()[name = tensor<string, []>("op_5736_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5736_cast_fp16 = einsum(equation = var_5736_equation_0, values = (var_5646_cast_fp16_12, var_5707_cast_fp16))[name = tensor<string, []>("op_5736_cast_fp16")];
+            tensor<string, []> var_5738_equation_0 = const()[name = tensor<string, []>("op_5738_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5738_cast_fp16 = einsum(equation = var_5738_equation_0, values = (var_5646_cast_fp16_13, var_5708_cast_fp16))[name = tensor<string, []>("op_5738_cast_fp16")];
+            tensor<string, []> var_5740_equation_0 = const()[name = tensor<string, []>("op_5740_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5740_cast_fp16 = einsum(equation = var_5740_equation_0, values = (var_5646_cast_fp16_14, var_5709_cast_fp16))[name = tensor<string, []>("op_5740_cast_fp16")];
+            tensor<string, []> var_5742_equation_0 = const()[name = tensor<string, []>("op_5742_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5742_cast_fp16 = einsum(equation = var_5742_equation_0, values = (var_5646_cast_fp16_15, var_5710_cast_fp16))[name = tensor<string, []>("op_5742_cast_fp16")];
+            tensor<bool, []> input_235_interleave_0 = const()[name = tensor<string, []>("input_235_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_235_cast_fp16 = concat(axis = var_5559, interleave = input_235_interleave_0, values = (var_5712_cast_fp16, var_5714_cast_fp16, var_5716_cast_fp16, var_5718_cast_fp16, var_5720_cast_fp16, var_5722_cast_fp16, var_5724_cast_fp16, var_5726_cast_fp16, var_5728_cast_fp16, var_5730_cast_fp16, var_5732_cast_fp16, var_5734_cast_fp16, var_5736_cast_fp16, var_5738_cast_fp16, var_5740_cast_fp16, var_5742_cast_fp16))[name = tensor<string, []>("input_235_cast_fp16")];
+            tensor<string, []> var_5751_pad_type_0 = const()[name = tensor<string, []>("op_5751_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5751_strides_0 = const()[name = tensor<string, []>("op_5751_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5751_pad_0 = const()[name = tensor<string, []>("op_5751_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5751_dilations_0 = const()[name = tensor<string, []>("op_5751_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5751_groups_0 = const()[name = tensor<string, []>("op_5751_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_23_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(595560832)))];
+            tensor<fp16, [1024]> blocks_23_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(597658048)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5751_cast_fp16 = conv(bias = blocks_23_attn_out_bias_to_fp16, dilations = var_5751_dilations_0, groups = var_5751_groups_0, pad = var_5751_pad_0, pad_type = var_5751_pad_type_0, strides = var_5751_strides_0, weight = blocks_23_attn_out_weight_to_fp16, x = input_235_cast_fp16)[name = tensor<string, []>("op_5751_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_95_cast_fp16 = add(x = inputs_93_cast_fp16, y = var_5751_cast_fp16)[name = tensor<string, []>("inputs_95_cast_fp16")];
+            tensor<int32, [1]> input_237_axes_0 = const()[name = tensor<string, []>("input_237_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_237_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_237_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(597660160)))];
+            tensor<fp16, [1024]> input_237_beta_0_to_fp16 = const()[name = tensor<string, []>("input_237_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(597662272)))];
+            tensor<fp16, []> var_5761_to_fp16 = const()[name = tensor<string, []>("op_5761_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_237_cast_fp16 = layer_norm(axes = input_237_axes_0, beta = input_237_beta_0_to_fp16, epsilon = var_5761_to_fp16, gamma = input_237_gamma_0_to_fp16, x = inputs_95_cast_fp16)[name = tensor<string, []>("input_237_cast_fp16")];
+            tensor<string, []> input_239_pad_type_0 = const()[name = tensor<string, []>("input_239_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_239_strides_0 = const()[name = tensor<string, []>("input_239_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_239_pad_0 = const()[name = tensor<string, []>("input_239_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_239_dilations_0 = const()[name = tensor<string, []>("input_239_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_239_groups_0 = const()[name = tensor<string, []>("input_239_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_23_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(597664384)))];
+            tensor<fp16, [4096]> blocks_23_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(606053056)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_239_cast_fp16 = conv(bias = blocks_23_mlp_0_bias_to_fp16, dilations = input_239_dilations_0, groups = input_239_groups_0, pad = input_239_pad_0, pad_type = input_239_pad_type_0, strides = input_239_strides_0, weight = blocks_23_mlp_0_weight_to_fp16, x = input_237_cast_fp16)[name = tensor<string, []>("input_239_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_239_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<string, []> var_5787_pad_type_0 = const()[name = tensor<string, []>("op_5787_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5787_strides_0 = const()[name = tensor<string, []>("op_5787_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5787_pad_0 = const()[name = tensor<string, []>("op_5787_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5787_dilations_0 = const()[name = tensor<string, []>("op_5787_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5787_groups_0 = const()[name = tensor<string, []>("op_5787_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_23_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(606061312)))];
+            tensor<fp16, [1024]> blocks_23_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614449984)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5787_cast_fp16 = conv(bias = blocks_23_mlp_2_bias_to_fp16, dilations = var_5787_dilations_0, groups = var_5787_groups_0, pad = var_5787_pad_0, pad_type = var_5787_pad_type_0, strides = var_5787_strides_0, weight = blocks_23_mlp_2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("op_5787_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_cast_fp16 = add(x = inputs_95_cast_fp16, y = var_5787_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<int32, [1]> x_axes_0 = const()[name = tensor<string, []>("x_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> x_gamma_0_to_fp16 = const()[name = tensor<string, []>("x_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614452096)))];
+            tensor<fp16, [1024]> x_beta_0_to_fp16 = const()[name = tensor<string, []>("x_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614454208)))];
+            tensor<fp16, []> var_5801_to_fp16 = const()[name = tensor<string, []>("op_5801_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> x_cast_fp16 = layer_norm(axes = x_axes_0, beta = x_beta_0_to_fp16, epsilon = var_5801_to_fp16, gamma = x_gamma_0_to_fp16, x = inputs_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
+            tensor<int32, [1]> var_5812_axes_0 = const()[name = tensor<string, []>("op_5812_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1024, 1500]> var_5812_cast_fp16 = squeeze(axes = var_5812_axes_0, x = x_cast_fp16)[name = tensor<string, []>("op_5812_cast_fp16")];
+            tensor<int32, [3]> var_5815_perm_0 = const()[name = tensor<string, []>("op_5815_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<string, []> var_5815_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_5815_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
+            tensor<fp16, [1, 1500, 1024]> var_5815_cast_fp16 = transpose(perm = var_5815_perm_0, x = var_5812_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp32, [1, 1500, 1024]> output = cast(dtype = var_5815_cast_fp16_to_fp32_dtype_0, x = var_5815_cast_fp16)[name = tensor<string, []>("cast_99")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/medium.en/ggml-medium.en-encoder.mlmodelc/weights/weight.bin b/medium.en/ggml-medium.en-encoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a1ff84b198cc02ec2b63b8ee094dce3f239565c5
--- /dev/null
+++ b/medium.en/ggml-medium.en-encoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74029c2746f01f4f43ebbcdaa98dfb6e0597eb7e3d4008fdef72c2113a4d2483
+size 614456320
diff --git a/medium.en/ggml-medium.en.bin b/medium.en/ggml-medium.en.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f8d7f988b60916d7f7e7feee9897c037a09b2f85
--- /dev/null
+++ b/medium.en/ggml-medium.en.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc37e93478338ec7700281a7ac30a10128929eb8f427dda2e865faa8f6da4356
+size 1533774781
diff --git a/medium/.DS_Store b/medium/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..79ccbf9bfb80488f459329a093e7ac4031bea9cc
Binary files /dev/null and b/medium/.DS_Store differ
diff --git a/medium/ggml-medium-encoder.mlmodelc/analytics/coremldata.bin b/medium/ggml-medium-encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..328e5c6707519542579df61365ef4c4391c16353
--- /dev/null
+++ b/medium/ggml-medium-encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:423209035d398f1e6945478cc49cf8e83022a3dc4e00d735c54289b3f62cdf89
+size 243
diff --git a/medium/ggml-medium-encoder.mlmodelc/coremldata.bin b/medium/ggml-medium-encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a7e2b54475c4dbc86435a318e0608d48945bae16
--- /dev/null
+++ b/medium/ggml-medium-encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d20a4fe17a031efc213c5c295df6967c6e87eba9cca3f07fa63c2beb835ca420
+size 320
diff --git a/medium/ggml-medium-encoder.mlmodelc/metadata.json b/medium/ggml-medium-encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..a4281c486a43b6865cadcf5135988a026525daaf
--- /dev/null
+++ b/medium/ggml-medium-encoder.mlmodelc/metadata.json
@@ -0,0 +1,71 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1500 × 1024)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 1024]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 6,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 24,
+      "Gelu" : 26,
+      "LayerNorm" : 49,
+      "Transpose" : 25,
+      "Softmax" : 384,
+      "Squeeze" : 1,
+      "Cast" : 2,
+      "Add" : 49,
+      "Einsum" : 768,
+      "ExpandDims" : 1,
+      "Split" : 72,
+      "Conv" : 146
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "12.0",
+      "tvOS" : "15.0",
+      "visionOS" : "1.0",
+      "watchOS" : "8.0",
+      "iOS" : "15.0",
+      "macCatalyst" : "15.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.version" : "8.3.0",
+      "com.github.apple.coremltools.source" : "torch==2.2.2"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 80 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "coreml_encoder_medium",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/medium/ggml-medium-encoder.mlmodelc/model.mil b/medium/ggml-medium-encoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..e6dfb1231cc22b9b4af2039b332f526ac8800584
--- /dev/null
+++ b/medium/ggml-medium-encoder.mlmodelc/model.mil
@@ -0,0 +1,3763 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.2.2"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
+{
+    func main<ios15>(tensor<fp32, [1, 80, 3000]> logmel_data) {
+            tensor<string, []> var_68_pad_type_0 = const()[name = tensor<string, []>("op_68_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_68_pad_0 = const()[name = tensor<string, []>("op_68_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_68_strides_0 = const()[name = tensor<string, []>("op_68_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_68_dilations_0 = const()[name = tensor<string, []>("op_68_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_68_groups_0 = const()[name = tensor<string, []>("op_68_groups_0"), val = tensor<int32, []>(1)];
+            tensor<string, []> logmel_data_to_fp16_dtype_0 = const()[name = tensor<string, []>("logmel_data_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [1024, 80, 3]> const_0_to_fp16 = const()[name = tensor<string, []>("const_0_to_fp16"), val = tensor<fp16, [1024, 80, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1024]> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(491648)))];
+            tensor<fp16, [1, 80, 3000]> logmel_data_to_fp16 = cast(dtype = logmel_data_to_fp16_dtype_0, x = logmel_data)[name = tensor<string, []>("cast_100")];
+            tensor<fp16, [1, 1024, 3000]> var_68_cast_fp16 = conv(bias = const_1_to_fp16, dilations = var_68_dilations_0, groups = var_68_groups_0, pad = var_68_pad_0, pad_type = var_68_pad_type_0, strides = var_68_strides_0, weight = const_0_to_fp16, x = logmel_data_to_fp16)[name = tensor<string, []>("op_68_cast_fp16")];
+            tensor<string, []> input_1_mode_0 = const()[name = tensor<string, []>("input_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1024, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_68_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<string, []> var_86_pad_type_0 = const()[name = tensor<string, []>("op_86_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_86_pad_0 = const()[name = tensor<string, []>("op_86_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_86_strides_0 = const()[name = tensor<string, []>("op_86_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_86_dilations_0 = const()[name = tensor<string, []>("op_86_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_86_groups_0 = const()[name = tensor<string, []>("op_86_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 3]> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, [1024, 1024, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(493760)))];
+            tensor<fp16, [1024]> const_3_to_fp16 = const()[name = tensor<string, []>("const_3_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6785280)))];
+            tensor<fp16, [1, 1024, 1500]> var_86_cast_fp16 = conv(bias = const_3_to_fp16, dilations = var_86_dilations_0, groups = var_86_groups_0, pad = var_86_pad_0, pad_type = var_86_pad_type_0, strides = var_86_strides_0, weight = const_2_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("op_86_cast_fp16")];
+            tensor<string, []> x_3_mode_0 = const()[name = tensor<string, []>("x_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1024, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_86_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
+            tensor<fp16, [1024, 1500]> var_91_to_fp16 = const()[name = tensor<string, []>("op_91_to_fp16"), val = tensor<fp16, [1024, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6787392)))];
+            tensor<fp16, [1, 1024, 1500]> var_93_cast_fp16 = add(x = x_3_cast_fp16, y = var_91_to_fp16)[name = tensor<string, []>("op_93_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_93_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_108 = const()[name = tensor<string, []>("op_108"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9859456)))];
+            tensor<fp16, [1024]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9861568)))];
+            tensor<fp16, []> var_124_to_fp16 = const()[name = tensor<string, []>("op_124_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_3_cast_fp16 = layer_norm(axes = input_3_axes_0, beta = input_3_beta_0_to_fp16, epsilon = var_124_to_fp16, gamma = input_3_gamma_0_to_fp16, x = inputs_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<string, []> q_1_pad_type_0 = const()[name = tensor<string, []>("q_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_1_strides_0 = const()[name = tensor<string, []>("q_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_1_pad_0 = const()[name = tensor<string, []>("q_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_1_dilations_0 = const()[name = tensor<string, []>("q_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_1_groups_0 = const()[name = tensor<string, []>("q_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_159_weight_0_to_fp16 = const()[name = tensor<string, []>("op_159_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9863680)))];
+            tensor<fp16, [1024]> var_159_bias_0_to_fp16 = const()[name = tensor<string, []>("op_159_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11960896)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_159_cast_fp16 = conv(bias = var_159_bias_0_to_fp16, dilations = q_1_dilations_0, groups = q_1_groups_0, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = q_1_strides_0, weight = var_159_weight_0_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_159_cast_fp16")];
+            tensor<string, []> k_1_pad_type_0 = const()[name = tensor<string, []>("k_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_1_strides_0 = const()[name = tensor<string, []>("k_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_1_pad_0 = const()[name = tensor<string, []>("k_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_1_dilations_0 = const()[name = tensor<string, []>("k_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_1_groups_0 = const()[name = tensor<string, []>("k_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_0_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11963008)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_1_cast_fp16 = conv(dilations = k_1_dilations_0, groups = k_1_groups_0, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = k_1_strides_0, weight = blocks_0_attn_key_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("k_1_cast_fp16")];
+            tensor<string, []> var_157_pad_type_0 = const()[name = tensor<string, []>("op_157_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_157_strides_0 = const()[name = tensor<string, []>("op_157_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_157_pad_0 = const()[name = tensor<string, []>("op_157_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_157_dilations_0 = const()[name = tensor<string, []>("op_157_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_157_groups_0 = const()[name = tensor<string, []>("op_157_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_0_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14060224)))];
+            tensor<fp16, [1024]> blocks_0_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16157440)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_157_cast_fp16 = conv(bias = blocks_0_attn_value_bias_to_fp16, dilations = var_157_dilations_0, groups = var_157_groups_0, pad = var_157_pad_0, pad_type = var_157_pad_type_0, strides = var_157_strides_0, weight = blocks_0_attn_value_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_157_cast_fp16")];
+            tensor<int32, [16]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_160_axis_0 = const()[name = tensor<string, []>("op_160_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_160_cast_fp16_15 = split(axis = var_160_axis_0, split_sizes = tile_0, x = var_159_cast_fp16)[name = tensor<string, []>("op_160_cast_fp16")];
+            tensor<int32, [4]> var_177_perm_0 = const()[name = tensor<string, []>("op_177_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_178_axis_0 = const()[name = tensor<string, []>("op_178_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_177_cast_fp16 = transpose(perm = var_177_perm_0, x = k_1_cast_fp16)[name = tensor<string, []>("transpose_24")];
+            tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_178_cast_fp16_15 = split(axis = var_178_axis_0, split_sizes = tile_1, x = var_177_cast_fp16)[name = tensor<string, []>("op_178_cast_fp16")];
+            tensor<int32, [16]> tile_2 = const()[name = tensor<string, []>("tile_2"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_195_axis_0 = const()[name = tensor<string, []>("op_195_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_195_cast_fp16_15 = split(axis = var_195_axis_0, split_sizes = tile_2, x = var_157_cast_fp16)[name = tensor<string, []>("op_195_cast_fp16")];
+            tensor<string, []> aw_1_equation_0 = const()[name = tensor<string, []>("aw_1_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1_cast_fp16 = einsum(equation = aw_1_equation_0, values = (var_178_cast_fp16_0, var_160_cast_fp16_0))[name = tensor<string, []>("aw_1_cast_fp16")];
+            tensor<string, []> aw_3_equation_0 = const()[name = tensor<string, []>("aw_3_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_3_cast_fp16 = einsum(equation = aw_3_equation_0, values = (var_178_cast_fp16_1, var_160_cast_fp16_1))[name = tensor<string, []>("aw_3_cast_fp16")];
+            tensor<string, []> aw_5_equation_0 = const()[name = tensor<string, []>("aw_5_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_5_cast_fp16 = einsum(equation = aw_5_equation_0, values = (var_178_cast_fp16_2, var_160_cast_fp16_2))[name = tensor<string, []>("aw_5_cast_fp16")];
+            tensor<string, []> aw_7_equation_0 = const()[name = tensor<string, []>("aw_7_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_7_cast_fp16 = einsum(equation = aw_7_equation_0, values = (var_178_cast_fp16_3, var_160_cast_fp16_3))[name = tensor<string, []>("aw_7_cast_fp16")];
+            tensor<string, []> aw_9_equation_0 = const()[name = tensor<string, []>("aw_9_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_9_cast_fp16 = einsum(equation = aw_9_equation_0, values = (var_178_cast_fp16_4, var_160_cast_fp16_4))[name = tensor<string, []>("aw_9_cast_fp16")];
+            tensor<string, []> aw_11_equation_0 = const()[name = tensor<string, []>("aw_11_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_11_cast_fp16 = einsum(equation = aw_11_equation_0, values = (var_178_cast_fp16_5, var_160_cast_fp16_5))[name = tensor<string, []>("aw_11_cast_fp16")];
+            tensor<string, []> aw_13_equation_0 = const()[name = tensor<string, []>("aw_13_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_13_cast_fp16 = einsum(equation = aw_13_equation_0, values = (var_178_cast_fp16_6, var_160_cast_fp16_6))[name = tensor<string, []>("aw_13_cast_fp16")];
+            tensor<string, []> aw_15_equation_0 = const()[name = tensor<string, []>("aw_15_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_15_cast_fp16 = einsum(equation = aw_15_equation_0, values = (var_178_cast_fp16_7, var_160_cast_fp16_7))[name = tensor<string, []>("aw_15_cast_fp16")];
+            tensor<string, []> aw_17_equation_0 = const()[name = tensor<string, []>("aw_17_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_17_cast_fp16 = einsum(equation = aw_17_equation_0, values = (var_178_cast_fp16_8, var_160_cast_fp16_8))[name = tensor<string, []>("aw_17_cast_fp16")];
+            tensor<string, []> aw_19_equation_0 = const()[name = tensor<string, []>("aw_19_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_19_cast_fp16 = einsum(equation = aw_19_equation_0, values = (var_178_cast_fp16_9, var_160_cast_fp16_9))[name = tensor<string, []>("aw_19_cast_fp16")];
+            tensor<string, []> aw_21_equation_0 = const()[name = tensor<string, []>("aw_21_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_21_cast_fp16 = einsum(equation = aw_21_equation_0, values = (var_178_cast_fp16_10, var_160_cast_fp16_10))[name = tensor<string, []>("aw_21_cast_fp16")];
+            tensor<string, []> aw_23_equation_0 = const()[name = tensor<string, []>("aw_23_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_23_cast_fp16 = einsum(equation = aw_23_equation_0, values = (var_178_cast_fp16_11, var_160_cast_fp16_11))[name = tensor<string, []>("aw_23_cast_fp16")];
+            tensor<string, []> aw_25_equation_0 = const()[name = tensor<string, []>("aw_25_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_25_cast_fp16 = einsum(equation = aw_25_equation_0, values = (var_178_cast_fp16_12, var_160_cast_fp16_12))[name = tensor<string, []>("aw_25_cast_fp16")];
+            tensor<string, []> aw_27_equation_0 = const()[name = tensor<string, []>("aw_27_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_27_cast_fp16 = einsum(equation = aw_27_equation_0, values = (var_178_cast_fp16_13, var_160_cast_fp16_13))[name = tensor<string, []>("aw_27_cast_fp16")];
+            tensor<string, []> aw_29_equation_0 = const()[name = tensor<string, []>("aw_29_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_29_cast_fp16 = einsum(equation = aw_29_equation_0, values = (var_178_cast_fp16_14, var_160_cast_fp16_14))[name = tensor<string, []>("aw_29_cast_fp16")];
+            tensor<string, []> aw_31_equation_0 = const()[name = tensor<string, []>("aw_31_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_31_cast_fp16 = einsum(equation = aw_31_equation_0, values = (var_178_cast_fp16_15, var_160_cast_fp16_15))[name = tensor<string, []>("aw_31_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_244_cast_fp16 = softmax(axis = var_108, x = aw_1_cast_fp16)[name = tensor<string, []>("op_244_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_245_cast_fp16 = softmax(axis = var_108, x = aw_3_cast_fp16)[name = tensor<string, []>("op_245_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_246_cast_fp16 = softmax(axis = var_108, x = aw_5_cast_fp16)[name = tensor<string, []>("op_246_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_247_cast_fp16 = softmax(axis = var_108, x = aw_7_cast_fp16)[name = tensor<string, []>("op_247_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_248_cast_fp16 = softmax(axis = var_108, x = aw_9_cast_fp16)[name = tensor<string, []>("op_248_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_249_cast_fp16 = softmax(axis = var_108, x = aw_11_cast_fp16)[name = tensor<string, []>("op_249_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_250_cast_fp16 = softmax(axis = var_108, x = aw_13_cast_fp16)[name = tensor<string, []>("op_250_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_251_cast_fp16 = softmax(axis = var_108, x = aw_15_cast_fp16)[name = tensor<string, []>("op_251_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_252_cast_fp16 = softmax(axis = var_108, x = aw_17_cast_fp16)[name = tensor<string, []>("op_252_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_253_cast_fp16 = softmax(axis = var_108, x = aw_19_cast_fp16)[name = tensor<string, []>("op_253_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_254_cast_fp16 = softmax(axis = var_108, x = aw_21_cast_fp16)[name = tensor<string, []>("op_254_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_255_cast_fp16 = softmax(axis = var_108, x = aw_23_cast_fp16)[name = tensor<string, []>("op_255_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_256_cast_fp16 = softmax(axis = var_108, x = aw_25_cast_fp16)[name = tensor<string, []>("op_256_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_257_cast_fp16 = softmax(axis = var_108, x = aw_27_cast_fp16)[name = tensor<string, []>("op_257_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_258_cast_fp16 = softmax(axis = var_108, x = aw_29_cast_fp16)[name = tensor<string, []>("op_258_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_259_cast_fp16 = softmax(axis = var_108, x = aw_31_cast_fp16)[name = tensor<string, []>("op_259_cast_fp16")];
+            tensor<string, []> var_261_equation_0 = const()[name = tensor<string, []>("op_261_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_261_cast_fp16 = einsum(equation = var_261_equation_0, values = (var_195_cast_fp16_0, var_244_cast_fp16))[name = tensor<string, []>("op_261_cast_fp16")];
+            tensor<string, []> var_263_equation_0 = const()[name = tensor<string, []>("op_263_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_263_cast_fp16 = einsum(equation = var_263_equation_0, values = (var_195_cast_fp16_1, var_245_cast_fp16))[name = tensor<string, []>("op_263_cast_fp16")];
+            tensor<string, []> var_265_equation_0 = const()[name = tensor<string, []>("op_265_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_265_cast_fp16 = einsum(equation = var_265_equation_0, values = (var_195_cast_fp16_2, var_246_cast_fp16))[name = tensor<string, []>("op_265_cast_fp16")];
+            tensor<string, []> var_267_equation_0 = const()[name = tensor<string, []>("op_267_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_267_cast_fp16 = einsum(equation = var_267_equation_0, values = (var_195_cast_fp16_3, var_247_cast_fp16))[name = tensor<string, []>("op_267_cast_fp16")];
+            tensor<string, []> var_269_equation_0 = const()[name = tensor<string, []>("op_269_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_269_cast_fp16 = einsum(equation = var_269_equation_0, values = (var_195_cast_fp16_4, var_248_cast_fp16))[name = tensor<string, []>("op_269_cast_fp16")];
+            tensor<string, []> var_271_equation_0 = const()[name = tensor<string, []>("op_271_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_271_cast_fp16 = einsum(equation = var_271_equation_0, values = (var_195_cast_fp16_5, var_249_cast_fp16))[name = tensor<string, []>("op_271_cast_fp16")];
+            tensor<string, []> var_273_equation_0 = const()[name = tensor<string, []>("op_273_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_273_cast_fp16 = einsum(equation = var_273_equation_0, values = (var_195_cast_fp16_6, var_250_cast_fp16))[name = tensor<string, []>("op_273_cast_fp16")];
+            tensor<string, []> var_275_equation_0 = const()[name = tensor<string, []>("op_275_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_275_cast_fp16 = einsum(equation = var_275_equation_0, values = (var_195_cast_fp16_7, var_251_cast_fp16))[name = tensor<string, []>("op_275_cast_fp16")];
+            tensor<string, []> var_277_equation_0 = const()[name = tensor<string, []>("op_277_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_277_cast_fp16 = einsum(equation = var_277_equation_0, values = (var_195_cast_fp16_8, var_252_cast_fp16))[name = tensor<string, []>("op_277_cast_fp16")];
+            tensor<string, []> var_279_equation_0 = const()[name = tensor<string, []>("op_279_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_279_cast_fp16 = einsum(equation = var_279_equation_0, values = (var_195_cast_fp16_9, var_253_cast_fp16))[name = tensor<string, []>("op_279_cast_fp16")];
+            tensor<string, []> var_281_equation_0 = const()[name = tensor<string, []>("op_281_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_281_cast_fp16 = einsum(equation = var_281_equation_0, values = (var_195_cast_fp16_10, var_254_cast_fp16))[name = tensor<string, []>("op_281_cast_fp16")];
+            tensor<string, []> var_283_equation_0 = const()[name = tensor<string, []>("op_283_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_283_cast_fp16 = einsum(equation = var_283_equation_0, values = (var_195_cast_fp16_11, var_255_cast_fp16))[name = tensor<string, []>("op_283_cast_fp16")];
+            tensor<string, []> var_285_equation_0 = const()[name = tensor<string, []>("op_285_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_285_cast_fp16 = einsum(equation = var_285_equation_0, values = (var_195_cast_fp16_12, var_256_cast_fp16))[name = tensor<string, []>("op_285_cast_fp16")];
+            tensor<string, []> var_287_equation_0 = const()[name = tensor<string, []>("op_287_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_287_cast_fp16 = einsum(equation = var_287_equation_0, values = (var_195_cast_fp16_13, var_257_cast_fp16))[name = tensor<string, []>("op_287_cast_fp16")];
+            tensor<string, []> var_289_equation_0 = const()[name = tensor<string, []>("op_289_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_289_cast_fp16 = einsum(equation = var_289_equation_0, values = (var_195_cast_fp16_14, var_258_cast_fp16))[name = tensor<string, []>("op_289_cast_fp16")];
+            tensor<string, []> var_291_equation_0 = const()[name = tensor<string, []>("op_291_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_291_cast_fp16 = einsum(equation = var_291_equation_0, values = (var_195_cast_fp16_15, var_259_cast_fp16))[name = tensor<string, []>("op_291_cast_fp16")];
+            tensor<bool, []> input_5_interleave_0 = const()[name = tensor<string, []>("input_5_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_5_cast_fp16 = concat(axis = var_108, interleave = input_5_interleave_0, values = (var_261_cast_fp16, var_263_cast_fp16, var_265_cast_fp16, var_267_cast_fp16, var_269_cast_fp16, var_271_cast_fp16, var_273_cast_fp16, var_275_cast_fp16, var_277_cast_fp16, var_279_cast_fp16, var_281_cast_fp16, var_283_cast_fp16, var_285_cast_fp16, var_287_cast_fp16, var_289_cast_fp16, var_291_cast_fp16))[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> var_300_pad_type_0 = const()[name = tensor<string, []>("op_300_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_300_strides_0 = const()[name = tensor<string, []>("op_300_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_300_pad_0 = const()[name = tensor<string, []>("op_300_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_300_dilations_0 = const()[name = tensor<string, []>("op_300_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_300_groups_0 = const()[name = tensor<string, []>("op_300_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_0_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16159552)))];
+            tensor<fp16, [1024]> blocks_0_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18256768)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_300_cast_fp16 = conv(bias = blocks_0_attn_out_bias_to_fp16, dilations = var_300_dilations_0, groups = var_300_groups_0, pad = var_300_pad_0, pad_type = var_300_pad_type_0, strides = var_300_strides_0, weight = blocks_0_attn_out_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("op_300_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = var_300_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> input_7_axes_0 = const()[name = tensor<string, []>("input_7_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_7_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_7_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18258880)))];
+            tensor<fp16, [1024]> input_7_beta_0_to_fp16 = const()[name = tensor<string, []>("input_7_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18260992)))];
+            tensor<fp16, []> var_310_to_fp16 = const()[name = tensor<string, []>("op_310_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = input_7_beta_0_to_fp16, epsilon = var_310_to_fp16, gamma = input_7_gamma_0_to_fp16, x = inputs_3_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_pad_type_0 = const()[name = tensor<string, []>("input_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_9_strides_0 = const()[name = tensor<string, []>("input_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_9_pad_0 = const()[name = tensor<string, []>("input_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_9_dilations_0 = const()[name = tensor<string, []>("input_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_9_groups_0 = const()[name = tensor<string, []>("input_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_0_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18263104)))];
+            tensor<fp16, [4096]> blocks_0_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26651776)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_9_cast_fp16 = conv(bias = blocks_0_mlp_0_bias_to_fp16, dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = blocks_0_mlp_0_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<string, []> input_11_mode_0 = const()[name = tensor<string, []>("input_11_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_11_cast_fp16 = gelu(mode = input_11_mode_0, x = input_9_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<string, []> var_336_pad_type_0 = const()[name = tensor<string, []>("op_336_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_336_strides_0 = const()[name = tensor<string, []>("op_336_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_336_pad_0 = const()[name = tensor<string, []>("op_336_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_336_dilations_0 = const()[name = tensor<string, []>("op_336_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_336_groups_0 = const()[name = tensor<string, []>("op_336_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_0_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26660032)))];
+            tensor<fp16, [1024]> blocks_0_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35048704)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_336_cast_fp16 = conv(bias = blocks_0_mlp_2_bias_to_fp16, dilations = var_336_dilations_0, groups = var_336_groups_0, pad = var_336_pad_0, pad_type = var_336_pad_type_0, strides = var_336_strides_0, weight = blocks_0_mlp_2_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("op_336_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = var_336_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_345 = const()[name = tensor<string, []>("op_345"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_13_axes_0 = const()[name = tensor<string, []>("input_13_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_13_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35050816)))];
+            tensor<fp16, [1024]> input_13_beta_0_to_fp16 = const()[name = tensor<string, []>("input_13_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35052928)))];
+            tensor<fp16, []> var_361_to_fp16 = const()[name = tensor<string, []>("op_361_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = input_13_beta_0_to_fp16, epsilon = var_361_to_fp16, gamma = input_13_gamma_0_to_fp16, x = inputs_5_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> q_3_pad_type_0 = const()[name = tensor<string, []>("q_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_3_strides_0 = const()[name = tensor<string, []>("q_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_3_pad_0 = const()[name = tensor<string, []>("q_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_3_dilations_0 = const()[name = tensor<string, []>("q_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_3_groups_0 = const()[name = tensor<string, []>("q_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_396_weight_0_to_fp16 = const()[name = tensor<string, []>("op_396_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35055040)))];
+            tensor<fp16, [1024]> var_396_bias_0_to_fp16 = const()[name = tensor<string, []>("op_396_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37152256)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_396_cast_fp16 = conv(bias = var_396_bias_0_to_fp16, dilations = q_3_dilations_0, groups = q_3_groups_0, pad = q_3_pad_0, pad_type = q_3_pad_type_0, strides = q_3_strides_0, weight = var_396_weight_0_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_396_cast_fp16")];
+            tensor<string, []> k_3_pad_type_0 = const()[name = tensor<string, []>("k_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_3_strides_0 = const()[name = tensor<string, []>("k_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_3_pad_0 = const()[name = tensor<string, []>("k_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_3_dilations_0 = const()[name = tensor<string, []>("k_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_3_groups_0 = const()[name = tensor<string, []>("k_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_1_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(37154368)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_3_cast_fp16 = conv(dilations = k_3_dilations_0, groups = k_3_groups_0, pad = k_3_pad_0, pad_type = k_3_pad_type_0, strides = k_3_strides_0, weight = blocks_1_attn_key_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("k_3_cast_fp16")];
+            tensor<string, []> var_394_pad_type_0 = const()[name = tensor<string, []>("op_394_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_394_strides_0 = const()[name = tensor<string, []>("op_394_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_394_pad_0 = const()[name = tensor<string, []>("op_394_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_394_dilations_0 = const()[name = tensor<string, []>("op_394_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_394_groups_0 = const()[name = tensor<string, []>("op_394_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_1_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39251584)))];
+            tensor<fp16, [1024]> blocks_1_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41348800)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_394_cast_fp16 = conv(bias = blocks_1_attn_value_bias_to_fp16, dilations = var_394_dilations_0, groups = var_394_groups_0, pad = var_394_pad_0, pad_type = var_394_pad_type_0, strides = var_394_strides_0, weight = blocks_1_attn_value_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_394_cast_fp16")];
+            tensor<int32, [16]> tile_3 = const()[name = tensor<string, []>("tile_3"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_397_axis_0 = const()[name = tensor<string, []>("op_397_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_397_cast_fp16_15 = split(axis = var_397_axis_0, split_sizes = tile_3, x = var_396_cast_fp16)[name = tensor<string, []>("op_397_cast_fp16")];
+            tensor<int32, [4]> var_414_perm_0 = const()[name = tensor<string, []>("op_414_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_4 = const()[name = tensor<string, []>("tile_4"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_415_axis_0 = const()[name = tensor<string, []>("op_415_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_414_cast_fp16 = transpose(perm = var_414_perm_0, x = k_3_cast_fp16)[name = tensor<string, []>("transpose_23")];
+            tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_415_cast_fp16_15 = split(axis = var_415_axis_0, split_sizes = tile_4, x = var_414_cast_fp16)[name = tensor<string, []>("op_415_cast_fp16")];
+            tensor<int32, [16]> tile_5 = const()[name = tensor<string, []>("tile_5"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_432_axis_0 = const()[name = tensor<string, []>("op_432_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16_15 = split(axis = var_432_axis_0, split_sizes = tile_5, x = var_394_cast_fp16)[name = tensor<string, []>("op_432_cast_fp16")];
+            tensor<string, []> aw_33_equation_0 = const()[name = tensor<string, []>("aw_33_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_33_cast_fp16 = einsum(equation = aw_33_equation_0, values = (var_415_cast_fp16_0, var_397_cast_fp16_0))[name = tensor<string, []>("aw_33_cast_fp16")];
+            tensor<string, []> aw_35_equation_0 = const()[name = tensor<string, []>("aw_35_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_35_cast_fp16 = einsum(equation = aw_35_equation_0, values = (var_415_cast_fp16_1, var_397_cast_fp16_1))[name = tensor<string, []>("aw_35_cast_fp16")];
+            tensor<string, []> aw_37_equation_0 = const()[name = tensor<string, []>("aw_37_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_37_cast_fp16 = einsum(equation = aw_37_equation_0, values = (var_415_cast_fp16_2, var_397_cast_fp16_2))[name = tensor<string, []>("aw_37_cast_fp16")];
+            tensor<string, []> aw_39_equation_0 = const()[name = tensor<string, []>("aw_39_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_39_cast_fp16 = einsum(equation = aw_39_equation_0, values = (var_415_cast_fp16_3, var_397_cast_fp16_3))[name = tensor<string, []>("aw_39_cast_fp16")];
+            tensor<string, []> aw_41_equation_0 = const()[name = tensor<string, []>("aw_41_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_41_cast_fp16 = einsum(equation = aw_41_equation_0, values = (var_415_cast_fp16_4, var_397_cast_fp16_4))[name = tensor<string, []>("aw_41_cast_fp16")];
+            tensor<string, []> aw_43_equation_0 = const()[name = tensor<string, []>("aw_43_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_43_cast_fp16 = einsum(equation = aw_43_equation_0, values = (var_415_cast_fp16_5, var_397_cast_fp16_5))[name = tensor<string, []>("aw_43_cast_fp16")];
+            tensor<string, []> aw_45_equation_0 = const()[name = tensor<string, []>("aw_45_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_45_cast_fp16 = einsum(equation = aw_45_equation_0, values = (var_415_cast_fp16_6, var_397_cast_fp16_6))[name = tensor<string, []>("aw_45_cast_fp16")];
+            tensor<string, []> aw_47_equation_0 = const()[name = tensor<string, []>("aw_47_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_47_cast_fp16 = einsum(equation = aw_47_equation_0, values = (var_415_cast_fp16_7, var_397_cast_fp16_7))[name = tensor<string, []>("aw_47_cast_fp16")];
+            tensor<string, []> aw_49_equation_0 = const()[name = tensor<string, []>("aw_49_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_49_cast_fp16 = einsum(equation = aw_49_equation_0, values = (var_415_cast_fp16_8, var_397_cast_fp16_8))[name = tensor<string, []>("aw_49_cast_fp16")];
+            tensor<string, []> aw_51_equation_0 = const()[name = tensor<string, []>("aw_51_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_51_cast_fp16 = einsum(equation = aw_51_equation_0, values = (var_415_cast_fp16_9, var_397_cast_fp16_9))[name = tensor<string, []>("aw_51_cast_fp16")];
+            tensor<string, []> aw_53_equation_0 = const()[name = tensor<string, []>("aw_53_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_53_cast_fp16 = einsum(equation = aw_53_equation_0, values = (var_415_cast_fp16_10, var_397_cast_fp16_10))[name = tensor<string, []>("aw_53_cast_fp16")];
+            tensor<string, []> aw_55_equation_0 = const()[name = tensor<string, []>("aw_55_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_55_cast_fp16 = einsum(equation = aw_55_equation_0, values = (var_415_cast_fp16_11, var_397_cast_fp16_11))[name = tensor<string, []>("aw_55_cast_fp16")];
+            tensor<string, []> aw_57_equation_0 = const()[name = tensor<string, []>("aw_57_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_57_cast_fp16 = einsum(equation = aw_57_equation_0, values = (var_415_cast_fp16_12, var_397_cast_fp16_12))[name = tensor<string, []>("aw_57_cast_fp16")];
+            tensor<string, []> aw_59_equation_0 = const()[name = tensor<string, []>("aw_59_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_59_cast_fp16 = einsum(equation = aw_59_equation_0, values = (var_415_cast_fp16_13, var_397_cast_fp16_13))[name = tensor<string, []>("aw_59_cast_fp16")];
+            tensor<string, []> aw_61_equation_0 = const()[name = tensor<string, []>("aw_61_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_61_cast_fp16 = einsum(equation = aw_61_equation_0, values = (var_415_cast_fp16_14, var_397_cast_fp16_14))[name = tensor<string, []>("aw_61_cast_fp16")];
+            tensor<string, []> aw_63_equation_0 = const()[name = tensor<string, []>("aw_63_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_63_cast_fp16 = einsum(equation = aw_63_equation_0, values = (var_415_cast_fp16_15, var_397_cast_fp16_15))[name = tensor<string, []>("aw_63_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_481_cast_fp16 = softmax(axis = var_345, x = aw_33_cast_fp16)[name = tensor<string, []>("op_481_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_482_cast_fp16 = softmax(axis = var_345, x = aw_35_cast_fp16)[name = tensor<string, []>("op_482_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_483_cast_fp16 = softmax(axis = var_345, x = aw_37_cast_fp16)[name = tensor<string, []>("op_483_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_484_cast_fp16 = softmax(axis = var_345, x = aw_39_cast_fp16)[name = tensor<string, []>("op_484_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_485_cast_fp16 = softmax(axis = var_345, x = aw_41_cast_fp16)[name = tensor<string, []>("op_485_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_486_cast_fp16 = softmax(axis = var_345, x = aw_43_cast_fp16)[name = tensor<string, []>("op_486_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_487_cast_fp16 = softmax(axis = var_345, x = aw_45_cast_fp16)[name = tensor<string, []>("op_487_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_488_cast_fp16 = softmax(axis = var_345, x = aw_47_cast_fp16)[name = tensor<string, []>("op_488_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_489_cast_fp16 = softmax(axis = var_345, x = aw_49_cast_fp16)[name = tensor<string, []>("op_489_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_490_cast_fp16 = softmax(axis = var_345, x = aw_51_cast_fp16)[name = tensor<string, []>("op_490_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_491_cast_fp16 = softmax(axis = var_345, x = aw_53_cast_fp16)[name = tensor<string, []>("op_491_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_492_cast_fp16 = softmax(axis = var_345, x = aw_55_cast_fp16)[name = tensor<string, []>("op_492_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_493_cast_fp16 = softmax(axis = var_345, x = aw_57_cast_fp16)[name = tensor<string, []>("op_493_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_494_cast_fp16 = softmax(axis = var_345, x = aw_59_cast_fp16)[name = tensor<string, []>("op_494_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_495_cast_fp16 = softmax(axis = var_345, x = aw_61_cast_fp16)[name = tensor<string, []>("op_495_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_496_cast_fp16 = softmax(axis = var_345, x = aw_63_cast_fp16)[name = tensor<string, []>("op_496_cast_fp16")];
+            tensor<string, []> var_498_equation_0 = const()[name = tensor<string, []>("op_498_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_498_cast_fp16 = einsum(equation = var_498_equation_0, values = (var_432_cast_fp16_0, var_481_cast_fp16))[name = tensor<string, []>("op_498_cast_fp16")];
+            tensor<string, []> var_500_equation_0 = const()[name = tensor<string, []>("op_500_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_500_cast_fp16 = einsum(equation = var_500_equation_0, values = (var_432_cast_fp16_1, var_482_cast_fp16))[name = tensor<string, []>("op_500_cast_fp16")];
+            tensor<string, []> var_502_equation_0 = const()[name = tensor<string, []>("op_502_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_502_cast_fp16 = einsum(equation = var_502_equation_0, values = (var_432_cast_fp16_2, var_483_cast_fp16))[name = tensor<string, []>("op_502_cast_fp16")];
+            tensor<string, []> var_504_equation_0 = const()[name = tensor<string, []>("op_504_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_504_cast_fp16 = einsum(equation = var_504_equation_0, values = (var_432_cast_fp16_3, var_484_cast_fp16))[name = tensor<string, []>("op_504_cast_fp16")];
+            tensor<string, []> var_506_equation_0 = const()[name = tensor<string, []>("op_506_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_506_cast_fp16 = einsum(equation = var_506_equation_0, values = (var_432_cast_fp16_4, var_485_cast_fp16))[name = tensor<string, []>("op_506_cast_fp16")];
+            tensor<string, []> var_508_equation_0 = const()[name = tensor<string, []>("op_508_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_508_cast_fp16 = einsum(equation = var_508_equation_0, values = (var_432_cast_fp16_5, var_486_cast_fp16))[name = tensor<string, []>("op_508_cast_fp16")];
+            tensor<string, []> var_510_equation_0 = const()[name = tensor<string, []>("op_510_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_510_cast_fp16 = einsum(equation = var_510_equation_0, values = (var_432_cast_fp16_6, var_487_cast_fp16))[name = tensor<string, []>("op_510_cast_fp16")];
+            tensor<string, []> var_512_equation_0 = const()[name = tensor<string, []>("op_512_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_512_cast_fp16 = einsum(equation = var_512_equation_0, values = (var_432_cast_fp16_7, var_488_cast_fp16))[name = tensor<string, []>("op_512_cast_fp16")];
+            tensor<string, []> var_514_equation_0 = const()[name = tensor<string, []>("op_514_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_514_cast_fp16 = einsum(equation = var_514_equation_0, values = (var_432_cast_fp16_8, var_489_cast_fp16))[name = tensor<string, []>("op_514_cast_fp16")];
+            tensor<string, []> var_516_equation_0 = const()[name = tensor<string, []>("op_516_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_516_cast_fp16 = einsum(equation = var_516_equation_0, values = (var_432_cast_fp16_9, var_490_cast_fp16))[name = tensor<string, []>("op_516_cast_fp16")];
+            tensor<string, []> var_518_equation_0 = const()[name = tensor<string, []>("op_518_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_518_cast_fp16 = einsum(equation = var_518_equation_0, values = (var_432_cast_fp16_10, var_491_cast_fp16))[name = tensor<string, []>("op_518_cast_fp16")];
+            tensor<string, []> var_520_equation_0 = const()[name = tensor<string, []>("op_520_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_520_cast_fp16 = einsum(equation = var_520_equation_0, values = (var_432_cast_fp16_11, var_492_cast_fp16))[name = tensor<string, []>("op_520_cast_fp16")];
+            tensor<string, []> var_522_equation_0 = const()[name = tensor<string, []>("op_522_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_522_cast_fp16 = einsum(equation = var_522_equation_0, values = (var_432_cast_fp16_12, var_493_cast_fp16))[name = tensor<string, []>("op_522_cast_fp16")];
+            tensor<string, []> var_524_equation_0 = const()[name = tensor<string, []>("op_524_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_524_cast_fp16 = einsum(equation = var_524_equation_0, values = (var_432_cast_fp16_13, var_494_cast_fp16))[name = tensor<string, []>("op_524_cast_fp16")];
+            tensor<string, []> var_526_equation_0 = const()[name = tensor<string, []>("op_526_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_526_cast_fp16 = einsum(equation = var_526_equation_0, values = (var_432_cast_fp16_14, var_495_cast_fp16))[name = tensor<string, []>("op_526_cast_fp16")];
+            tensor<string, []> var_528_equation_0 = const()[name = tensor<string, []>("op_528_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_528_cast_fp16 = einsum(equation = var_528_equation_0, values = (var_432_cast_fp16_15, var_496_cast_fp16))[name = tensor<string, []>("op_528_cast_fp16")];
+            tensor<bool, []> input_15_interleave_0 = const()[name = tensor<string, []>("input_15_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_15_cast_fp16 = concat(axis = var_345, interleave = input_15_interleave_0, values = (var_498_cast_fp16, var_500_cast_fp16, var_502_cast_fp16, var_504_cast_fp16, var_506_cast_fp16, var_508_cast_fp16, var_510_cast_fp16, var_512_cast_fp16, var_514_cast_fp16, var_516_cast_fp16, var_518_cast_fp16, var_520_cast_fp16, var_522_cast_fp16, var_524_cast_fp16, var_526_cast_fp16, var_528_cast_fp16))[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<string, []> var_537_pad_type_0 = const()[name = tensor<string, []>("op_537_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_537_strides_0 = const()[name = tensor<string, []>("op_537_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_537_pad_0 = const()[name = tensor<string, []>("op_537_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_537_dilations_0 = const()[name = tensor<string, []>("op_537_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_537_groups_0 = const()[name = tensor<string, []>("op_537_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_1_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(41350912)))];
+            tensor<fp16, [1024]> blocks_1_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43448128)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_537_cast_fp16 = conv(bias = blocks_1_attn_out_bias_to_fp16, dilations = var_537_dilations_0, groups = var_537_groups_0, pad = var_537_pad_0, pad_type = var_537_pad_type_0, strides = var_537_strides_0, weight = blocks_1_attn_out_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("op_537_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = var_537_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> input_17_axes_0 = const()[name = tensor<string, []>("input_17_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_17_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_17_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43450240)))];
+            tensor<fp16, [1024]> input_17_beta_0_to_fp16 = const()[name = tensor<string, []>("input_17_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43452352)))];
+            tensor<fp16, []> var_547_to_fp16 = const()[name = tensor<string, []>("op_547_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_17_cast_fp16 = layer_norm(axes = input_17_axes_0, beta = input_17_beta_0_to_fp16, epsilon = var_547_to_fp16, gamma = input_17_gamma_0_to_fp16, x = inputs_7_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_pad_type_0 = const()[name = tensor<string, []>("input_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_19_strides_0 = const()[name = tensor<string, []>("input_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_19_pad_0 = const()[name = tensor<string, []>("input_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_19_dilations_0 = const()[name = tensor<string, []>("input_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_19_groups_0 = const()[name = tensor<string, []>("input_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_1_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43454464)))];
+            tensor<fp16, [4096]> blocks_1_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51843136)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_19_cast_fp16 = conv(bias = blocks_1_mlp_0_bias_to_fp16, dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = blocks_1_mlp_0_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<string, []> input_21_mode_0 = const()[name = tensor<string, []>("input_21_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_21_cast_fp16 = gelu(mode = input_21_mode_0, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> var_573_pad_type_0 = const()[name = tensor<string, []>("op_573_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_573_strides_0 = const()[name = tensor<string, []>("op_573_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_573_pad_0 = const()[name = tensor<string, []>("op_573_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_573_dilations_0 = const()[name = tensor<string, []>("op_573_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_573_groups_0 = const()[name = tensor<string, []>("op_573_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_1_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51851392)))];
+            tensor<fp16, [1024]> blocks_1_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60240064)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_573_cast_fp16 = conv(bias = blocks_1_mlp_2_bias_to_fp16, dilations = var_573_dilations_0, groups = var_573_groups_0, pad = var_573_pad_0, pad_type = var_573_pad_type_0, strides = var_573_strides_0, weight = blocks_1_mlp_2_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("op_573_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = var_573_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_582 = const()[name = tensor<string, []>("op_582"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_23_axes_0 = const()[name = tensor<string, []>("input_23_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_23_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60242176)))];
+            tensor<fp16, [1024]> input_23_beta_0_to_fp16 = const()[name = tensor<string, []>("input_23_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60244288)))];
+            tensor<fp16, []> var_598_to_fp16 = const()[name = tensor<string, []>("op_598_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_23_cast_fp16 = layer_norm(axes = input_23_axes_0, beta = input_23_beta_0_to_fp16, epsilon = var_598_to_fp16, gamma = input_23_gamma_0_to_fp16, x = inputs_9_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<string, []> q_5_pad_type_0 = const()[name = tensor<string, []>("q_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_5_strides_0 = const()[name = tensor<string, []>("q_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_5_pad_0 = const()[name = tensor<string, []>("q_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_5_dilations_0 = const()[name = tensor<string, []>("q_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_5_groups_0 = const()[name = tensor<string, []>("q_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_633_weight_0_to_fp16 = const()[name = tensor<string, []>("op_633_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60246400)))];
+            tensor<fp16, [1024]> var_633_bias_0_to_fp16 = const()[name = tensor<string, []>("op_633_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62343616)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_633_cast_fp16 = conv(bias = var_633_bias_0_to_fp16, dilations = q_5_dilations_0, groups = q_5_groups_0, pad = q_5_pad_0, pad_type = q_5_pad_type_0, strides = q_5_strides_0, weight = var_633_weight_0_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_633_cast_fp16")];
+            tensor<string, []> k_5_pad_type_0 = const()[name = tensor<string, []>("k_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_5_strides_0 = const()[name = tensor<string, []>("k_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_5_pad_0 = const()[name = tensor<string, []>("k_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_5_dilations_0 = const()[name = tensor<string, []>("k_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_5_groups_0 = const()[name = tensor<string, []>("k_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_2_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62345728)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_5_cast_fp16 = conv(dilations = k_5_dilations_0, groups = k_5_groups_0, pad = k_5_pad_0, pad_type = k_5_pad_type_0, strides = k_5_strides_0, weight = blocks_2_attn_key_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("k_5_cast_fp16")];
+            tensor<string, []> var_631_pad_type_0 = const()[name = tensor<string, []>("op_631_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_631_strides_0 = const()[name = tensor<string, []>("op_631_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_631_pad_0 = const()[name = tensor<string, []>("op_631_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_631_dilations_0 = const()[name = tensor<string, []>("op_631_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_631_groups_0 = const()[name = tensor<string, []>("op_631_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_2_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64442944)))];
+            tensor<fp16, [1024]> blocks_2_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66540160)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_631_cast_fp16 = conv(bias = blocks_2_attn_value_bias_to_fp16, dilations = var_631_dilations_0, groups = var_631_groups_0, pad = var_631_pad_0, pad_type = var_631_pad_type_0, strides = var_631_strides_0, weight = blocks_2_attn_value_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_631_cast_fp16")];
+            tensor<int32, [16]> tile_6 = const()[name = tensor<string, []>("tile_6"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_634_axis_0 = const()[name = tensor<string, []>("op_634_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16_15 = split(axis = var_634_axis_0, split_sizes = tile_6, x = var_633_cast_fp16)[name = tensor<string, []>("op_634_cast_fp16")];
+            tensor<int32, [4]> var_651_perm_0 = const()[name = tensor<string, []>("op_651_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_7 = const()[name = tensor<string, []>("tile_7"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_652_axis_0 = const()[name = tensor<string, []>("op_652_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_651_cast_fp16 = transpose(perm = var_651_perm_0, x = k_5_cast_fp16)[name = tensor<string, []>("transpose_22")];
+            tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_652_cast_fp16_15 = split(axis = var_652_axis_0, split_sizes = tile_7, x = var_651_cast_fp16)[name = tensor<string, []>("op_652_cast_fp16")];
+            tensor<int32, [16]> tile_8 = const()[name = tensor<string, []>("tile_8"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_669_axis_0 = const()[name = tensor<string, []>("op_669_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_669_cast_fp16_15 = split(axis = var_669_axis_0, split_sizes = tile_8, x = var_631_cast_fp16)[name = tensor<string, []>("op_669_cast_fp16")];
+            tensor<string, []> aw_65_equation_0 = const()[name = tensor<string, []>("aw_65_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_65_cast_fp16 = einsum(equation = aw_65_equation_0, values = (var_652_cast_fp16_0, var_634_cast_fp16_0))[name = tensor<string, []>("aw_65_cast_fp16")];
+            tensor<string, []> aw_67_equation_0 = const()[name = tensor<string, []>("aw_67_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_67_cast_fp16 = einsum(equation = aw_67_equation_0, values = (var_652_cast_fp16_1, var_634_cast_fp16_1))[name = tensor<string, []>("aw_67_cast_fp16")];
+            tensor<string, []> aw_69_equation_0 = const()[name = tensor<string, []>("aw_69_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_69_cast_fp16 = einsum(equation = aw_69_equation_0, values = (var_652_cast_fp16_2, var_634_cast_fp16_2))[name = tensor<string, []>("aw_69_cast_fp16")];
+            tensor<string, []> aw_71_equation_0 = const()[name = tensor<string, []>("aw_71_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_71_cast_fp16 = einsum(equation = aw_71_equation_0, values = (var_652_cast_fp16_3, var_634_cast_fp16_3))[name = tensor<string, []>("aw_71_cast_fp16")];
+            tensor<string, []> aw_73_equation_0 = const()[name = tensor<string, []>("aw_73_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_73_cast_fp16 = einsum(equation = aw_73_equation_0, values = (var_652_cast_fp16_4, var_634_cast_fp16_4))[name = tensor<string, []>("aw_73_cast_fp16")];
+            tensor<string, []> aw_75_equation_0 = const()[name = tensor<string, []>("aw_75_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_75_cast_fp16 = einsum(equation = aw_75_equation_0, values = (var_652_cast_fp16_5, var_634_cast_fp16_5))[name = tensor<string, []>("aw_75_cast_fp16")];
+            tensor<string, []> aw_77_equation_0 = const()[name = tensor<string, []>("aw_77_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_77_cast_fp16 = einsum(equation = aw_77_equation_0, values = (var_652_cast_fp16_6, var_634_cast_fp16_6))[name = tensor<string, []>("aw_77_cast_fp16")];
+            tensor<string, []> aw_79_equation_0 = const()[name = tensor<string, []>("aw_79_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_79_cast_fp16 = einsum(equation = aw_79_equation_0, values = (var_652_cast_fp16_7, var_634_cast_fp16_7))[name = tensor<string, []>("aw_79_cast_fp16")];
+            tensor<string, []> aw_81_equation_0 = const()[name = tensor<string, []>("aw_81_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_81_cast_fp16 = einsum(equation = aw_81_equation_0, values = (var_652_cast_fp16_8, var_634_cast_fp16_8))[name = tensor<string, []>("aw_81_cast_fp16")];
+            tensor<string, []> aw_83_equation_0 = const()[name = tensor<string, []>("aw_83_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_83_cast_fp16 = einsum(equation = aw_83_equation_0, values = (var_652_cast_fp16_9, var_634_cast_fp16_9))[name = tensor<string, []>("aw_83_cast_fp16")];
+            tensor<string, []> aw_85_equation_0 = const()[name = tensor<string, []>("aw_85_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_85_cast_fp16 = einsum(equation = aw_85_equation_0, values = (var_652_cast_fp16_10, var_634_cast_fp16_10))[name = tensor<string, []>("aw_85_cast_fp16")];
+            tensor<string, []> aw_87_equation_0 = const()[name = tensor<string, []>("aw_87_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_87_cast_fp16 = einsum(equation = aw_87_equation_0, values = (var_652_cast_fp16_11, var_634_cast_fp16_11))[name = tensor<string, []>("aw_87_cast_fp16")];
+            tensor<string, []> aw_89_equation_0 = const()[name = tensor<string, []>("aw_89_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_89_cast_fp16 = einsum(equation = aw_89_equation_0, values = (var_652_cast_fp16_12, var_634_cast_fp16_12))[name = tensor<string, []>("aw_89_cast_fp16")];
+            tensor<string, []> aw_91_equation_0 = const()[name = tensor<string, []>("aw_91_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_91_cast_fp16 = einsum(equation = aw_91_equation_0, values = (var_652_cast_fp16_13, var_634_cast_fp16_13))[name = tensor<string, []>("aw_91_cast_fp16")];
+            tensor<string, []> aw_93_equation_0 = const()[name = tensor<string, []>("aw_93_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_93_cast_fp16 = einsum(equation = aw_93_equation_0, values = (var_652_cast_fp16_14, var_634_cast_fp16_14))[name = tensor<string, []>("aw_93_cast_fp16")];
+            tensor<string, []> aw_95_equation_0 = const()[name = tensor<string, []>("aw_95_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_95_cast_fp16 = einsum(equation = aw_95_equation_0, values = (var_652_cast_fp16_15, var_634_cast_fp16_15))[name = tensor<string, []>("aw_95_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_718_cast_fp16 = softmax(axis = var_582, x = aw_65_cast_fp16)[name = tensor<string, []>("op_718_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_719_cast_fp16 = softmax(axis = var_582, x = aw_67_cast_fp16)[name = tensor<string, []>("op_719_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_720_cast_fp16 = softmax(axis = var_582, x = aw_69_cast_fp16)[name = tensor<string, []>("op_720_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_721_cast_fp16 = softmax(axis = var_582, x = aw_71_cast_fp16)[name = tensor<string, []>("op_721_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_722_cast_fp16 = softmax(axis = var_582, x = aw_73_cast_fp16)[name = tensor<string, []>("op_722_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_723_cast_fp16 = softmax(axis = var_582, x = aw_75_cast_fp16)[name = tensor<string, []>("op_723_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_724_cast_fp16 = softmax(axis = var_582, x = aw_77_cast_fp16)[name = tensor<string, []>("op_724_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_725_cast_fp16 = softmax(axis = var_582, x = aw_79_cast_fp16)[name = tensor<string, []>("op_725_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_726_cast_fp16 = softmax(axis = var_582, x = aw_81_cast_fp16)[name = tensor<string, []>("op_726_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_727_cast_fp16 = softmax(axis = var_582, x = aw_83_cast_fp16)[name = tensor<string, []>("op_727_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_728_cast_fp16 = softmax(axis = var_582, x = aw_85_cast_fp16)[name = tensor<string, []>("op_728_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_729_cast_fp16 = softmax(axis = var_582, x = aw_87_cast_fp16)[name = tensor<string, []>("op_729_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_730_cast_fp16 = softmax(axis = var_582, x = aw_89_cast_fp16)[name = tensor<string, []>("op_730_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_731_cast_fp16 = softmax(axis = var_582, x = aw_91_cast_fp16)[name = tensor<string, []>("op_731_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_732_cast_fp16 = softmax(axis = var_582, x = aw_93_cast_fp16)[name = tensor<string, []>("op_732_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_733_cast_fp16 = softmax(axis = var_582, x = aw_95_cast_fp16)[name = tensor<string, []>("op_733_cast_fp16")];
+            tensor<string, []> var_735_equation_0 = const()[name = tensor<string, []>("op_735_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_735_cast_fp16 = einsum(equation = var_735_equation_0, values = (var_669_cast_fp16_0, var_718_cast_fp16))[name = tensor<string, []>("op_735_cast_fp16")];
+            tensor<string, []> var_737_equation_0 = const()[name = tensor<string, []>("op_737_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_737_cast_fp16 = einsum(equation = var_737_equation_0, values = (var_669_cast_fp16_1, var_719_cast_fp16))[name = tensor<string, []>("op_737_cast_fp16")];
+            tensor<string, []> var_739_equation_0 = const()[name = tensor<string, []>("op_739_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_739_cast_fp16 = einsum(equation = var_739_equation_0, values = (var_669_cast_fp16_2, var_720_cast_fp16))[name = tensor<string, []>("op_739_cast_fp16")];
+            tensor<string, []> var_741_equation_0 = const()[name = tensor<string, []>("op_741_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_741_cast_fp16 = einsum(equation = var_741_equation_0, values = (var_669_cast_fp16_3, var_721_cast_fp16))[name = tensor<string, []>("op_741_cast_fp16")];
+            tensor<string, []> var_743_equation_0 = const()[name = tensor<string, []>("op_743_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_743_cast_fp16 = einsum(equation = var_743_equation_0, values = (var_669_cast_fp16_4, var_722_cast_fp16))[name = tensor<string, []>("op_743_cast_fp16")];
+            tensor<string, []> var_745_equation_0 = const()[name = tensor<string, []>("op_745_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_745_cast_fp16 = einsum(equation = var_745_equation_0, values = (var_669_cast_fp16_5, var_723_cast_fp16))[name = tensor<string, []>("op_745_cast_fp16")];
+            tensor<string, []> var_747_equation_0 = const()[name = tensor<string, []>("op_747_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_747_cast_fp16 = einsum(equation = var_747_equation_0, values = (var_669_cast_fp16_6, var_724_cast_fp16))[name = tensor<string, []>("op_747_cast_fp16")];
+            tensor<string, []> var_749_equation_0 = const()[name = tensor<string, []>("op_749_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_749_cast_fp16 = einsum(equation = var_749_equation_0, values = (var_669_cast_fp16_7, var_725_cast_fp16))[name = tensor<string, []>("op_749_cast_fp16")];
+            tensor<string, []> var_751_equation_0 = const()[name = tensor<string, []>("op_751_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16 = einsum(equation = var_751_equation_0, values = (var_669_cast_fp16_8, var_726_cast_fp16))[name = tensor<string, []>("op_751_cast_fp16")];
+            tensor<string, []> var_753_equation_0 = const()[name = tensor<string, []>("op_753_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_753_cast_fp16 = einsum(equation = var_753_equation_0, values = (var_669_cast_fp16_9, var_727_cast_fp16))[name = tensor<string, []>("op_753_cast_fp16")];
+            tensor<string, []> var_755_equation_0 = const()[name = tensor<string, []>("op_755_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_755_cast_fp16 = einsum(equation = var_755_equation_0, values = (var_669_cast_fp16_10, var_728_cast_fp16))[name = tensor<string, []>("op_755_cast_fp16")];
+            tensor<string, []> var_757_equation_0 = const()[name = tensor<string, []>("op_757_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_757_cast_fp16 = einsum(equation = var_757_equation_0, values = (var_669_cast_fp16_11, var_729_cast_fp16))[name = tensor<string, []>("op_757_cast_fp16")];
+            tensor<string, []> var_759_equation_0 = const()[name = tensor<string, []>("op_759_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_759_cast_fp16 = einsum(equation = var_759_equation_0, values = (var_669_cast_fp16_12, var_730_cast_fp16))[name = tensor<string, []>("op_759_cast_fp16")];
+            tensor<string, []> var_761_equation_0 = const()[name = tensor<string, []>("op_761_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_761_cast_fp16 = einsum(equation = var_761_equation_0, values = (var_669_cast_fp16_13, var_731_cast_fp16))[name = tensor<string, []>("op_761_cast_fp16")];
+            tensor<string, []> var_763_equation_0 = const()[name = tensor<string, []>("op_763_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_763_cast_fp16 = einsum(equation = var_763_equation_0, values = (var_669_cast_fp16_14, var_732_cast_fp16))[name = tensor<string, []>("op_763_cast_fp16")];
+            tensor<string, []> var_765_equation_0 = const()[name = tensor<string, []>("op_765_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_765_cast_fp16 = einsum(equation = var_765_equation_0, values = (var_669_cast_fp16_15, var_733_cast_fp16))[name = tensor<string, []>("op_765_cast_fp16")];
+            tensor<bool, []> input_25_interleave_0 = const()[name = tensor<string, []>("input_25_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_25_cast_fp16 = concat(axis = var_582, interleave = input_25_interleave_0, values = (var_735_cast_fp16, var_737_cast_fp16, var_739_cast_fp16, var_741_cast_fp16, var_743_cast_fp16, var_745_cast_fp16, var_747_cast_fp16, var_749_cast_fp16, var_751_cast_fp16, var_753_cast_fp16, var_755_cast_fp16, var_757_cast_fp16, var_759_cast_fp16, var_761_cast_fp16, var_763_cast_fp16, var_765_cast_fp16))[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<string, []> var_774_pad_type_0 = const()[name = tensor<string, []>("op_774_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_774_strides_0 = const()[name = tensor<string, []>("op_774_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_774_pad_0 = const()[name = tensor<string, []>("op_774_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_774_dilations_0 = const()[name = tensor<string, []>("op_774_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_774_groups_0 = const()[name = tensor<string, []>("op_774_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_2_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66542272)))];
+            tensor<fp16, [1024]> blocks_2_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68639488)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_774_cast_fp16 = conv(bias = blocks_2_attn_out_bias_to_fp16, dilations = var_774_dilations_0, groups = var_774_groups_0, pad = var_774_pad_0, pad_type = var_774_pad_type_0, strides = var_774_strides_0, weight = blocks_2_attn_out_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("op_774_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = var_774_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> input_27_axes_0 = const()[name = tensor<string, []>("input_27_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68641600)))];
+            tensor<fp16, [1024]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68643712)))];
+            tensor<fp16, []> var_784_to_fp16 = const()[name = tensor<string, []>("op_784_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_27_cast_fp16 = layer_norm(axes = input_27_axes_0, beta = input_27_beta_0_to_fp16, epsilon = var_784_to_fp16, gamma = input_27_gamma_0_to_fp16, x = inputs_11_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_29_strides_0 = const()[name = tensor<string, []>("input_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_29_dilations_0 = const()[name = tensor<string, []>("input_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_29_groups_0 = const()[name = tensor<string, []>("input_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_2_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68645824)))];
+            tensor<fp16, [4096]> blocks_2_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77034496)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_29_cast_fp16 = conv(bias = blocks_2_mlp_0_bias_to_fp16, dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = blocks_2_mlp_0_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<string, []> var_810_pad_type_0 = const()[name = tensor<string, []>("op_810_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_810_strides_0 = const()[name = tensor<string, []>("op_810_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_810_pad_0 = const()[name = tensor<string, []>("op_810_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_810_dilations_0 = const()[name = tensor<string, []>("op_810_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_810_groups_0 = const()[name = tensor<string, []>("op_810_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_2_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77042752)))];
+            tensor<fp16, [1024]> blocks_2_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85431424)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_810_cast_fp16 = conv(bias = blocks_2_mlp_2_bias_to_fp16, dilations = var_810_dilations_0, groups = var_810_groups_0, pad = var_810_pad_0, pad_type = var_810_pad_type_0, strides = var_810_strides_0, weight = blocks_2_mlp_2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("op_810_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = var_810_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_819 = const()[name = tensor<string, []>("op_819"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_33_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_33_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85433536)))];
+            tensor<fp16, [1024]> input_33_beta_0_to_fp16 = const()[name = tensor<string, []>("input_33_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85435648)))];
+            tensor<fp16, []> var_835_to_fp16 = const()[name = tensor<string, []>("op_835_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, beta = input_33_beta_0_to_fp16, epsilon = var_835_to_fp16, gamma = input_33_gamma_0_to_fp16, x = inputs_13_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<string, []> q_7_pad_type_0 = const()[name = tensor<string, []>("q_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_7_strides_0 = const()[name = tensor<string, []>("q_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_7_pad_0 = const()[name = tensor<string, []>("q_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_7_dilations_0 = const()[name = tensor<string, []>("q_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_7_groups_0 = const()[name = tensor<string, []>("q_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_870_weight_0_to_fp16 = const()[name = tensor<string, []>("op_870_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85437760)))];
+            tensor<fp16, [1024]> var_870_bias_0_to_fp16 = const()[name = tensor<string, []>("op_870_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87534976)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_870_cast_fp16 = conv(bias = var_870_bias_0_to_fp16, dilations = q_7_dilations_0, groups = q_7_groups_0, pad = q_7_pad_0, pad_type = q_7_pad_type_0, strides = q_7_strides_0, weight = var_870_weight_0_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_870_cast_fp16")];
+            tensor<string, []> k_7_pad_type_0 = const()[name = tensor<string, []>("k_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_7_strides_0 = const()[name = tensor<string, []>("k_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_7_pad_0 = const()[name = tensor<string, []>("k_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_7_dilations_0 = const()[name = tensor<string, []>("k_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_7_groups_0 = const()[name = tensor<string, []>("k_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_3_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(87537088)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_7_cast_fp16 = conv(dilations = k_7_dilations_0, groups = k_7_groups_0, pad = k_7_pad_0, pad_type = k_7_pad_type_0, strides = k_7_strides_0, weight = blocks_3_attn_key_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("k_7_cast_fp16")];
+            tensor<string, []> var_868_pad_type_0 = const()[name = tensor<string, []>("op_868_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_868_strides_0 = const()[name = tensor<string, []>("op_868_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_868_pad_0 = const()[name = tensor<string, []>("op_868_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_868_dilations_0 = const()[name = tensor<string, []>("op_868_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_868_groups_0 = const()[name = tensor<string, []>("op_868_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_3_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89634304)))];
+            tensor<fp16, [1024]> blocks_3_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91731520)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_868_cast_fp16 = conv(bias = blocks_3_attn_value_bias_to_fp16, dilations = var_868_dilations_0, groups = var_868_groups_0, pad = var_868_pad_0, pad_type = var_868_pad_type_0, strides = var_868_strides_0, weight = blocks_3_attn_value_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_868_cast_fp16")];
+            tensor<int32, [16]> tile_9 = const()[name = tensor<string, []>("tile_9"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_871_axis_0 = const()[name = tensor<string, []>("op_871_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_871_cast_fp16_15 = split(axis = var_871_axis_0, split_sizes = tile_9, x = var_870_cast_fp16)[name = tensor<string, []>("op_871_cast_fp16")];
+            tensor<int32, [4]> var_888_perm_0 = const()[name = tensor<string, []>("op_888_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_10 = const()[name = tensor<string, []>("tile_10"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_889_axis_0 = const()[name = tensor<string, []>("op_889_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_888_cast_fp16 = transpose(perm = var_888_perm_0, x = k_7_cast_fp16)[name = tensor<string, []>("transpose_21")];
+            tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_889_cast_fp16_15 = split(axis = var_889_axis_0, split_sizes = tile_10, x = var_888_cast_fp16)[name = tensor<string, []>("op_889_cast_fp16")];
+            tensor<int32, [16]> tile_11 = const()[name = tensor<string, []>("tile_11"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_906_axis_0 = const()[name = tensor<string, []>("op_906_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_906_cast_fp16_15 = split(axis = var_906_axis_0, split_sizes = tile_11, x = var_868_cast_fp16)[name = tensor<string, []>("op_906_cast_fp16")];
+            tensor<string, []> aw_97_equation_0 = const()[name = tensor<string, []>("aw_97_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_97_cast_fp16 = einsum(equation = aw_97_equation_0, values = (var_889_cast_fp16_0, var_871_cast_fp16_0))[name = tensor<string, []>("aw_97_cast_fp16")];
+            tensor<string, []> aw_99_equation_0 = const()[name = tensor<string, []>("aw_99_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_99_cast_fp16 = einsum(equation = aw_99_equation_0, values = (var_889_cast_fp16_1, var_871_cast_fp16_1))[name = tensor<string, []>("aw_99_cast_fp16")];
+            tensor<string, []> aw_101_equation_0 = const()[name = tensor<string, []>("aw_101_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_101_cast_fp16 = einsum(equation = aw_101_equation_0, values = (var_889_cast_fp16_2, var_871_cast_fp16_2))[name = tensor<string, []>("aw_101_cast_fp16")];
+            tensor<string, []> aw_103_equation_0 = const()[name = tensor<string, []>("aw_103_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_103_cast_fp16 = einsum(equation = aw_103_equation_0, values = (var_889_cast_fp16_3, var_871_cast_fp16_3))[name = tensor<string, []>("aw_103_cast_fp16")];
+            tensor<string, []> aw_105_equation_0 = const()[name = tensor<string, []>("aw_105_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_105_cast_fp16 = einsum(equation = aw_105_equation_0, values = (var_889_cast_fp16_4, var_871_cast_fp16_4))[name = tensor<string, []>("aw_105_cast_fp16")];
+            tensor<string, []> aw_107_equation_0 = const()[name = tensor<string, []>("aw_107_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_107_cast_fp16 = einsum(equation = aw_107_equation_0, values = (var_889_cast_fp16_5, var_871_cast_fp16_5))[name = tensor<string, []>("aw_107_cast_fp16")];
+            tensor<string, []> aw_109_equation_0 = const()[name = tensor<string, []>("aw_109_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_109_cast_fp16 = einsum(equation = aw_109_equation_0, values = (var_889_cast_fp16_6, var_871_cast_fp16_6))[name = tensor<string, []>("aw_109_cast_fp16")];
+            tensor<string, []> aw_111_equation_0 = const()[name = tensor<string, []>("aw_111_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_111_cast_fp16 = einsum(equation = aw_111_equation_0, values = (var_889_cast_fp16_7, var_871_cast_fp16_7))[name = tensor<string, []>("aw_111_cast_fp16")];
+            tensor<string, []> aw_113_equation_0 = const()[name = tensor<string, []>("aw_113_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_113_cast_fp16 = einsum(equation = aw_113_equation_0, values = (var_889_cast_fp16_8, var_871_cast_fp16_8))[name = tensor<string, []>("aw_113_cast_fp16")];
+            tensor<string, []> aw_115_equation_0 = const()[name = tensor<string, []>("aw_115_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_115_cast_fp16 = einsum(equation = aw_115_equation_0, values = (var_889_cast_fp16_9, var_871_cast_fp16_9))[name = tensor<string, []>("aw_115_cast_fp16")];
+            tensor<string, []> aw_117_equation_0 = const()[name = tensor<string, []>("aw_117_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_117_cast_fp16 = einsum(equation = aw_117_equation_0, values = (var_889_cast_fp16_10, var_871_cast_fp16_10))[name = tensor<string, []>("aw_117_cast_fp16")];
+            tensor<string, []> aw_119_equation_0 = const()[name = tensor<string, []>("aw_119_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_119_cast_fp16 = einsum(equation = aw_119_equation_0, values = (var_889_cast_fp16_11, var_871_cast_fp16_11))[name = tensor<string, []>("aw_119_cast_fp16")];
+            tensor<string, []> aw_121_equation_0 = const()[name = tensor<string, []>("aw_121_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_121_cast_fp16 = einsum(equation = aw_121_equation_0, values = (var_889_cast_fp16_12, var_871_cast_fp16_12))[name = tensor<string, []>("aw_121_cast_fp16")];
+            tensor<string, []> aw_123_equation_0 = const()[name = tensor<string, []>("aw_123_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_123_cast_fp16 = einsum(equation = aw_123_equation_0, values = (var_889_cast_fp16_13, var_871_cast_fp16_13))[name = tensor<string, []>("aw_123_cast_fp16")];
+            tensor<string, []> aw_125_equation_0 = const()[name = tensor<string, []>("aw_125_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_125_cast_fp16 = einsum(equation = aw_125_equation_0, values = (var_889_cast_fp16_14, var_871_cast_fp16_14))[name = tensor<string, []>("aw_125_cast_fp16")];
+            tensor<string, []> aw_127_equation_0 = const()[name = tensor<string, []>("aw_127_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_127_cast_fp16 = einsum(equation = aw_127_equation_0, values = (var_889_cast_fp16_15, var_871_cast_fp16_15))[name = tensor<string, []>("aw_127_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_955_cast_fp16 = softmax(axis = var_819, x = aw_97_cast_fp16)[name = tensor<string, []>("op_955_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_956_cast_fp16 = softmax(axis = var_819, x = aw_99_cast_fp16)[name = tensor<string, []>("op_956_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_957_cast_fp16 = softmax(axis = var_819, x = aw_101_cast_fp16)[name = tensor<string, []>("op_957_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_958_cast_fp16 = softmax(axis = var_819, x = aw_103_cast_fp16)[name = tensor<string, []>("op_958_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_959_cast_fp16 = softmax(axis = var_819, x = aw_105_cast_fp16)[name = tensor<string, []>("op_959_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_960_cast_fp16 = softmax(axis = var_819, x = aw_107_cast_fp16)[name = tensor<string, []>("op_960_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_961_cast_fp16 = softmax(axis = var_819, x = aw_109_cast_fp16)[name = tensor<string, []>("op_961_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_962_cast_fp16 = softmax(axis = var_819, x = aw_111_cast_fp16)[name = tensor<string, []>("op_962_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_963_cast_fp16 = softmax(axis = var_819, x = aw_113_cast_fp16)[name = tensor<string, []>("op_963_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_964_cast_fp16 = softmax(axis = var_819, x = aw_115_cast_fp16)[name = tensor<string, []>("op_964_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_965_cast_fp16 = softmax(axis = var_819, x = aw_117_cast_fp16)[name = tensor<string, []>("op_965_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_966_cast_fp16 = softmax(axis = var_819, x = aw_119_cast_fp16)[name = tensor<string, []>("op_966_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_967_cast_fp16 = softmax(axis = var_819, x = aw_121_cast_fp16)[name = tensor<string, []>("op_967_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_968_cast_fp16 = softmax(axis = var_819, x = aw_123_cast_fp16)[name = tensor<string, []>("op_968_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_969_cast_fp16 = softmax(axis = var_819, x = aw_125_cast_fp16)[name = tensor<string, []>("op_969_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_970_cast_fp16 = softmax(axis = var_819, x = aw_127_cast_fp16)[name = tensor<string, []>("op_970_cast_fp16")];
+            tensor<string, []> var_972_equation_0 = const()[name = tensor<string, []>("op_972_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_972_cast_fp16 = einsum(equation = var_972_equation_0, values = (var_906_cast_fp16_0, var_955_cast_fp16))[name = tensor<string, []>("op_972_cast_fp16")];
+            tensor<string, []> var_974_equation_0 = const()[name = tensor<string, []>("op_974_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_974_cast_fp16 = einsum(equation = var_974_equation_0, values = (var_906_cast_fp16_1, var_956_cast_fp16))[name = tensor<string, []>("op_974_cast_fp16")];
+            tensor<string, []> var_976_equation_0 = const()[name = tensor<string, []>("op_976_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_976_cast_fp16 = einsum(equation = var_976_equation_0, values = (var_906_cast_fp16_2, var_957_cast_fp16))[name = tensor<string, []>("op_976_cast_fp16")];
+            tensor<string, []> var_978_equation_0 = const()[name = tensor<string, []>("op_978_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_978_cast_fp16 = einsum(equation = var_978_equation_0, values = (var_906_cast_fp16_3, var_958_cast_fp16))[name = tensor<string, []>("op_978_cast_fp16")];
+            tensor<string, []> var_980_equation_0 = const()[name = tensor<string, []>("op_980_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_980_cast_fp16 = einsum(equation = var_980_equation_0, values = (var_906_cast_fp16_4, var_959_cast_fp16))[name = tensor<string, []>("op_980_cast_fp16")];
+            tensor<string, []> var_982_equation_0 = const()[name = tensor<string, []>("op_982_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_982_cast_fp16 = einsum(equation = var_982_equation_0, values = (var_906_cast_fp16_5, var_960_cast_fp16))[name = tensor<string, []>("op_982_cast_fp16")];
+            tensor<string, []> var_984_equation_0 = const()[name = tensor<string, []>("op_984_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_984_cast_fp16 = einsum(equation = var_984_equation_0, values = (var_906_cast_fp16_6, var_961_cast_fp16))[name = tensor<string, []>("op_984_cast_fp16")];
+            tensor<string, []> var_986_equation_0 = const()[name = tensor<string, []>("op_986_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_986_cast_fp16 = einsum(equation = var_986_equation_0, values = (var_906_cast_fp16_7, var_962_cast_fp16))[name = tensor<string, []>("op_986_cast_fp16")];
+            tensor<string, []> var_988_equation_0 = const()[name = tensor<string, []>("op_988_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_988_cast_fp16 = einsum(equation = var_988_equation_0, values = (var_906_cast_fp16_8, var_963_cast_fp16))[name = tensor<string, []>("op_988_cast_fp16")];
+            tensor<string, []> var_990_equation_0 = const()[name = tensor<string, []>("op_990_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_990_cast_fp16 = einsum(equation = var_990_equation_0, values = (var_906_cast_fp16_9, var_964_cast_fp16))[name = tensor<string, []>("op_990_cast_fp16")];
+            tensor<string, []> var_992_equation_0 = const()[name = tensor<string, []>("op_992_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_992_cast_fp16 = einsum(equation = var_992_equation_0, values = (var_906_cast_fp16_10, var_965_cast_fp16))[name = tensor<string, []>("op_992_cast_fp16")];
+            tensor<string, []> var_994_equation_0 = const()[name = tensor<string, []>("op_994_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_994_cast_fp16 = einsum(equation = var_994_equation_0, values = (var_906_cast_fp16_11, var_966_cast_fp16))[name = tensor<string, []>("op_994_cast_fp16")];
+            tensor<string, []> var_996_equation_0 = const()[name = tensor<string, []>("op_996_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_996_cast_fp16 = einsum(equation = var_996_equation_0, values = (var_906_cast_fp16_12, var_967_cast_fp16))[name = tensor<string, []>("op_996_cast_fp16")];
+            tensor<string, []> var_998_equation_0 = const()[name = tensor<string, []>("op_998_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_998_cast_fp16 = einsum(equation = var_998_equation_0, values = (var_906_cast_fp16_13, var_968_cast_fp16))[name = tensor<string, []>("op_998_cast_fp16")];
+            tensor<string, []> var_1000_equation_0 = const()[name = tensor<string, []>("op_1000_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1000_cast_fp16 = einsum(equation = var_1000_equation_0, values = (var_906_cast_fp16_14, var_969_cast_fp16))[name = tensor<string, []>("op_1000_cast_fp16")];
+            tensor<string, []> var_1002_equation_0 = const()[name = tensor<string, []>("op_1002_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1002_cast_fp16 = einsum(equation = var_1002_equation_0, values = (var_906_cast_fp16_15, var_970_cast_fp16))[name = tensor<string, []>("op_1002_cast_fp16")];
+            tensor<bool, []> input_35_interleave_0 = const()[name = tensor<string, []>("input_35_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_35_cast_fp16 = concat(axis = var_819, interleave = input_35_interleave_0, values = (var_972_cast_fp16, var_974_cast_fp16, var_976_cast_fp16, var_978_cast_fp16, var_980_cast_fp16, var_982_cast_fp16, var_984_cast_fp16, var_986_cast_fp16, var_988_cast_fp16, var_990_cast_fp16, var_992_cast_fp16, var_994_cast_fp16, var_996_cast_fp16, var_998_cast_fp16, var_1000_cast_fp16, var_1002_cast_fp16))[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<string, []> var_1011_pad_type_0 = const()[name = tensor<string, []>("op_1011_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1011_strides_0 = const()[name = tensor<string, []>("op_1011_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1011_pad_0 = const()[name = tensor<string, []>("op_1011_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1011_dilations_0 = const()[name = tensor<string, []>("op_1011_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1011_groups_0 = const()[name = tensor<string, []>("op_1011_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_3_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91733632)))];
+            tensor<fp16, [1024]> blocks_3_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93830848)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1011_cast_fp16 = conv(bias = blocks_3_attn_out_bias_to_fp16, dilations = var_1011_dilations_0, groups = var_1011_groups_0, pad = var_1011_pad_0, pad_type = var_1011_pad_type_0, strides = var_1011_strides_0, weight = blocks_3_attn_out_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("op_1011_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = var_1011_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> input_37_axes_0 = const()[name = tensor<string, []>("input_37_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_37_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93832960)))];
+            tensor<fp16, [1024]> input_37_beta_0_to_fp16 = const()[name = tensor<string, []>("input_37_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93835072)))];
+            tensor<fp16, []> var_1021_to_fp16 = const()[name = tensor<string, []>("op_1021_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = input_37_beta_0_to_fp16, epsilon = var_1021_to_fp16, gamma = input_37_gamma_0_to_fp16, x = inputs_15_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_pad_type_0 = const()[name = tensor<string, []>("input_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_39_strides_0 = const()[name = tensor<string, []>("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_39_pad_0 = const()[name = tensor<string, []>("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_39_dilations_0 = const()[name = tensor<string, []>("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_39_groups_0 = const()[name = tensor<string, []>("input_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_3_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93837184)))];
+            tensor<fp16, [4096]> blocks_3_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102225856)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_39_cast_fp16 = conv(bias = blocks_3_mlp_0_bias_to_fp16, dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = blocks_3_mlp_0_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<string, []> input_41_mode_0 = const()[name = tensor<string, []>("input_41_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = input_39_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<string, []> var_1047_pad_type_0 = const()[name = tensor<string, []>("op_1047_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1047_strides_0 = const()[name = tensor<string, []>("op_1047_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1047_pad_0 = const()[name = tensor<string, []>("op_1047_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1047_dilations_0 = const()[name = tensor<string, []>("op_1047_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1047_groups_0 = const()[name = tensor<string, []>("op_1047_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_3_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(102234112)))];
+            tensor<fp16, [1024]> blocks_3_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110622784)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1047_cast_fp16 = conv(bias = blocks_3_mlp_2_bias_to_fp16, dilations = var_1047_dilations_0, groups = var_1047_groups_0, pad = var_1047_pad_0, pad_type = var_1047_pad_type_0, strides = var_1047_strides_0, weight = blocks_3_mlp_2_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("op_1047_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = var_1047_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, []> var_1056 = const()[name = tensor<string, []>("op_1056"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_43_axes_0 = const()[name = tensor<string, []>("input_43_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_43_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110624896)))];
+            tensor<fp16, [1024]> input_43_beta_0_to_fp16 = const()[name = tensor<string, []>("input_43_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110627008)))];
+            tensor<fp16, []> var_1072_to_fp16 = const()[name = tensor<string, []>("op_1072_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_43_cast_fp16 = layer_norm(axes = input_43_axes_0, beta = input_43_beta_0_to_fp16, epsilon = var_1072_to_fp16, gamma = input_43_gamma_0_to_fp16, x = inputs_17_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<string, []> q_9_pad_type_0 = const()[name = tensor<string, []>("q_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_9_strides_0 = const()[name = tensor<string, []>("q_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_9_pad_0 = const()[name = tensor<string, []>("q_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_9_dilations_0 = const()[name = tensor<string, []>("q_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_9_groups_0 = const()[name = tensor<string, []>("q_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_1107_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1107_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110629120)))];
+            tensor<fp16, [1024]> var_1107_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1107_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112726336)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1107_cast_fp16 = conv(bias = var_1107_bias_0_to_fp16, dilations = q_9_dilations_0, groups = q_9_groups_0, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = q_9_strides_0, weight = var_1107_weight_0_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_1107_cast_fp16")];
+            tensor<string, []> k_9_pad_type_0 = const()[name = tensor<string, []>("k_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_9_strides_0 = const()[name = tensor<string, []>("k_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_9_pad_0 = const()[name = tensor<string, []>("k_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_9_dilations_0 = const()[name = tensor<string, []>("k_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_9_groups_0 = const()[name = tensor<string, []>("k_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_4_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(112728448)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_9_cast_fp16 = conv(dilations = k_9_dilations_0, groups = k_9_groups_0, pad = k_9_pad_0, pad_type = k_9_pad_type_0, strides = k_9_strides_0, weight = blocks_4_attn_key_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("k_9_cast_fp16")];
+            tensor<string, []> var_1105_pad_type_0 = const()[name = tensor<string, []>("op_1105_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1105_strides_0 = const()[name = tensor<string, []>("op_1105_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1105_pad_0 = const()[name = tensor<string, []>("op_1105_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1105_dilations_0 = const()[name = tensor<string, []>("op_1105_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1105_groups_0 = const()[name = tensor<string, []>("op_1105_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_4_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114825664)))];
+            tensor<fp16, [1024]> blocks_4_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116922880)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1105_cast_fp16 = conv(bias = blocks_4_attn_value_bias_to_fp16, dilations = var_1105_dilations_0, groups = var_1105_groups_0, pad = var_1105_pad_0, pad_type = var_1105_pad_type_0, strides = var_1105_strides_0, weight = blocks_4_attn_value_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_1105_cast_fp16")];
+            tensor<int32, [16]> tile_12 = const()[name = tensor<string, []>("tile_12"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1108_axis_0 = const()[name = tensor<string, []>("op_1108_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1108_cast_fp16_15 = split(axis = var_1108_axis_0, split_sizes = tile_12, x = var_1107_cast_fp16)[name = tensor<string, []>("op_1108_cast_fp16")];
+            tensor<int32, [4]> var_1125_perm_0 = const()[name = tensor<string, []>("op_1125_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_13 = const()[name = tensor<string, []>("tile_13"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1126_axis_0 = const()[name = tensor<string, []>("op_1126_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_1125_cast_fp16 = transpose(perm = var_1125_perm_0, x = k_9_cast_fp16)[name = tensor<string, []>("transpose_20")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1126_cast_fp16_15 = split(axis = var_1126_axis_0, split_sizes = tile_13, x = var_1125_cast_fp16)[name = tensor<string, []>("op_1126_cast_fp16")];
+            tensor<int32, [16]> tile_14 = const()[name = tensor<string, []>("tile_14"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1143_axis_0 = const()[name = tensor<string, []>("op_1143_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1143_cast_fp16_15 = split(axis = var_1143_axis_0, split_sizes = tile_14, x = var_1105_cast_fp16)[name = tensor<string, []>("op_1143_cast_fp16")];
+            tensor<string, []> aw_129_equation_0 = const()[name = tensor<string, []>("aw_129_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_129_cast_fp16 = einsum(equation = aw_129_equation_0, values = (var_1126_cast_fp16_0, var_1108_cast_fp16_0))[name = tensor<string, []>("aw_129_cast_fp16")];
+            tensor<string, []> aw_131_equation_0 = const()[name = tensor<string, []>("aw_131_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_131_cast_fp16 = einsum(equation = aw_131_equation_0, values = (var_1126_cast_fp16_1, var_1108_cast_fp16_1))[name = tensor<string, []>("aw_131_cast_fp16")];
+            tensor<string, []> aw_133_equation_0 = const()[name = tensor<string, []>("aw_133_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_133_cast_fp16 = einsum(equation = aw_133_equation_0, values = (var_1126_cast_fp16_2, var_1108_cast_fp16_2))[name = tensor<string, []>("aw_133_cast_fp16")];
+            tensor<string, []> aw_135_equation_0 = const()[name = tensor<string, []>("aw_135_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_135_cast_fp16 = einsum(equation = aw_135_equation_0, values = (var_1126_cast_fp16_3, var_1108_cast_fp16_3))[name = tensor<string, []>("aw_135_cast_fp16")];
+            tensor<string, []> aw_137_equation_0 = const()[name = tensor<string, []>("aw_137_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_137_cast_fp16 = einsum(equation = aw_137_equation_0, values = (var_1126_cast_fp16_4, var_1108_cast_fp16_4))[name = tensor<string, []>("aw_137_cast_fp16")];
+            tensor<string, []> aw_139_equation_0 = const()[name = tensor<string, []>("aw_139_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_139_cast_fp16 = einsum(equation = aw_139_equation_0, values = (var_1126_cast_fp16_5, var_1108_cast_fp16_5))[name = tensor<string, []>("aw_139_cast_fp16")];
+            tensor<string, []> aw_141_equation_0 = const()[name = tensor<string, []>("aw_141_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_141_cast_fp16 = einsum(equation = aw_141_equation_0, values = (var_1126_cast_fp16_6, var_1108_cast_fp16_6))[name = tensor<string, []>("aw_141_cast_fp16")];
+            tensor<string, []> aw_143_equation_0 = const()[name = tensor<string, []>("aw_143_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_143_cast_fp16 = einsum(equation = aw_143_equation_0, values = (var_1126_cast_fp16_7, var_1108_cast_fp16_7))[name = tensor<string, []>("aw_143_cast_fp16")];
+            tensor<string, []> aw_145_equation_0 = const()[name = tensor<string, []>("aw_145_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_145_cast_fp16 = einsum(equation = aw_145_equation_0, values = (var_1126_cast_fp16_8, var_1108_cast_fp16_8))[name = tensor<string, []>("aw_145_cast_fp16")];
+            tensor<string, []> aw_147_equation_0 = const()[name = tensor<string, []>("aw_147_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_147_cast_fp16 = einsum(equation = aw_147_equation_0, values = (var_1126_cast_fp16_9, var_1108_cast_fp16_9))[name = tensor<string, []>("aw_147_cast_fp16")];
+            tensor<string, []> aw_149_equation_0 = const()[name = tensor<string, []>("aw_149_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_149_cast_fp16 = einsum(equation = aw_149_equation_0, values = (var_1126_cast_fp16_10, var_1108_cast_fp16_10))[name = tensor<string, []>("aw_149_cast_fp16")];
+            tensor<string, []> aw_151_equation_0 = const()[name = tensor<string, []>("aw_151_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_151_cast_fp16 = einsum(equation = aw_151_equation_0, values = (var_1126_cast_fp16_11, var_1108_cast_fp16_11))[name = tensor<string, []>("aw_151_cast_fp16")];
+            tensor<string, []> aw_153_equation_0 = const()[name = tensor<string, []>("aw_153_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_153_cast_fp16 = einsum(equation = aw_153_equation_0, values = (var_1126_cast_fp16_12, var_1108_cast_fp16_12))[name = tensor<string, []>("aw_153_cast_fp16")];
+            tensor<string, []> aw_155_equation_0 = const()[name = tensor<string, []>("aw_155_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_155_cast_fp16 = einsum(equation = aw_155_equation_0, values = (var_1126_cast_fp16_13, var_1108_cast_fp16_13))[name = tensor<string, []>("aw_155_cast_fp16")];
+            tensor<string, []> aw_157_equation_0 = const()[name = tensor<string, []>("aw_157_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_157_cast_fp16 = einsum(equation = aw_157_equation_0, values = (var_1126_cast_fp16_14, var_1108_cast_fp16_14))[name = tensor<string, []>("aw_157_cast_fp16")];
+            tensor<string, []> aw_159_equation_0 = const()[name = tensor<string, []>("aw_159_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_159_cast_fp16 = einsum(equation = aw_159_equation_0, values = (var_1126_cast_fp16_15, var_1108_cast_fp16_15))[name = tensor<string, []>("aw_159_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1192_cast_fp16 = softmax(axis = var_1056, x = aw_129_cast_fp16)[name = tensor<string, []>("op_1192_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1193_cast_fp16 = softmax(axis = var_1056, x = aw_131_cast_fp16)[name = tensor<string, []>("op_1193_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1194_cast_fp16 = softmax(axis = var_1056, x = aw_133_cast_fp16)[name = tensor<string, []>("op_1194_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1195_cast_fp16 = softmax(axis = var_1056, x = aw_135_cast_fp16)[name = tensor<string, []>("op_1195_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1196_cast_fp16 = softmax(axis = var_1056, x = aw_137_cast_fp16)[name = tensor<string, []>("op_1196_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1197_cast_fp16 = softmax(axis = var_1056, x = aw_139_cast_fp16)[name = tensor<string, []>("op_1197_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1198_cast_fp16 = softmax(axis = var_1056, x = aw_141_cast_fp16)[name = tensor<string, []>("op_1198_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1199_cast_fp16 = softmax(axis = var_1056, x = aw_143_cast_fp16)[name = tensor<string, []>("op_1199_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1200_cast_fp16 = softmax(axis = var_1056, x = aw_145_cast_fp16)[name = tensor<string, []>("op_1200_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1201_cast_fp16 = softmax(axis = var_1056, x = aw_147_cast_fp16)[name = tensor<string, []>("op_1201_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1202_cast_fp16 = softmax(axis = var_1056, x = aw_149_cast_fp16)[name = tensor<string, []>("op_1202_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1203_cast_fp16 = softmax(axis = var_1056, x = aw_151_cast_fp16)[name = tensor<string, []>("op_1203_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1204_cast_fp16 = softmax(axis = var_1056, x = aw_153_cast_fp16)[name = tensor<string, []>("op_1204_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1205_cast_fp16 = softmax(axis = var_1056, x = aw_155_cast_fp16)[name = tensor<string, []>("op_1205_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1206_cast_fp16 = softmax(axis = var_1056, x = aw_157_cast_fp16)[name = tensor<string, []>("op_1206_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1207_cast_fp16 = softmax(axis = var_1056, x = aw_159_cast_fp16)[name = tensor<string, []>("op_1207_cast_fp16")];
+            tensor<string, []> var_1209_equation_0 = const()[name = tensor<string, []>("op_1209_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1209_cast_fp16 = einsum(equation = var_1209_equation_0, values = (var_1143_cast_fp16_0, var_1192_cast_fp16))[name = tensor<string, []>("op_1209_cast_fp16")];
+            tensor<string, []> var_1211_equation_0 = const()[name = tensor<string, []>("op_1211_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1211_cast_fp16 = einsum(equation = var_1211_equation_0, values = (var_1143_cast_fp16_1, var_1193_cast_fp16))[name = tensor<string, []>("op_1211_cast_fp16")];
+            tensor<string, []> var_1213_equation_0 = const()[name = tensor<string, []>("op_1213_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1213_cast_fp16 = einsum(equation = var_1213_equation_0, values = (var_1143_cast_fp16_2, var_1194_cast_fp16))[name = tensor<string, []>("op_1213_cast_fp16")];
+            tensor<string, []> var_1215_equation_0 = const()[name = tensor<string, []>("op_1215_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1215_cast_fp16 = einsum(equation = var_1215_equation_0, values = (var_1143_cast_fp16_3, var_1195_cast_fp16))[name = tensor<string, []>("op_1215_cast_fp16")];
+            tensor<string, []> var_1217_equation_0 = const()[name = tensor<string, []>("op_1217_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1217_cast_fp16 = einsum(equation = var_1217_equation_0, values = (var_1143_cast_fp16_4, var_1196_cast_fp16))[name = tensor<string, []>("op_1217_cast_fp16")];
+            tensor<string, []> var_1219_equation_0 = const()[name = tensor<string, []>("op_1219_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1219_cast_fp16 = einsum(equation = var_1219_equation_0, values = (var_1143_cast_fp16_5, var_1197_cast_fp16))[name = tensor<string, []>("op_1219_cast_fp16")];
+            tensor<string, []> var_1221_equation_0 = const()[name = tensor<string, []>("op_1221_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1221_cast_fp16 = einsum(equation = var_1221_equation_0, values = (var_1143_cast_fp16_6, var_1198_cast_fp16))[name = tensor<string, []>("op_1221_cast_fp16")];
+            tensor<string, []> var_1223_equation_0 = const()[name = tensor<string, []>("op_1223_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1223_cast_fp16 = einsum(equation = var_1223_equation_0, values = (var_1143_cast_fp16_7, var_1199_cast_fp16))[name = tensor<string, []>("op_1223_cast_fp16")];
+            tensor<string, []> var_1225_equation_0 = const()[name = tensor<string, []>("op_1225_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1225_cast_fp16 = einsum(equation = var_1225_equation_0, values = (var_1143_cast_fp16_8, var_1200_cast_fp16))[name = tensor<string, []>("op_1225_cast_fp16")];
+            tensor<string, []> var_1227_equation_0 = const()[name = tensor<string, []>("op_1227_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1227_cast_fp16 = einsum(equation = var_1227_equation_0, values = (var_1143_cast_fp16_9, var_1201_cast_fp16))[name = tensor<string, []>("op_1227_cast_fp16")];
+            tensor<string, []> var_1229_equation_0 = const()[name = tensor<string, []>("op_1229_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1229_cast_fp16 = einsum(equation = var_1229_equation_0, values = (var_1143_cast_fp16_10, var_1202_cast_fp16))[name = tensor<string, []>("op_1229_cast_fp16")];
+            tensor<string, []> var_1231_equation_0 = const()[name = tensor<string, []>("op_1231_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1231_cast_fp16 = einsum(equation = var_1231_equation_0, values = (var_1143_cast_fp16_11, var_1203_cast_fp16))[name = tensor<string, []>("op_1231_cast_fp16")];
+            tensor<string, []> var_1233_equation_0 = const()[name = tensor<string, []>("op_1233_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1233_cast_fp16 = einsum(equation = var_1233_equation_0, values = (var_1143_cast_fp16_12, var_1204_cast_fp16))[name = tensor<string, []>("op_1233_cast_fp16")];
+            tensor<string, []> var_1235_equation_0 = const()[name = tensor<string, []>("op_1235_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1235_cast_fp16 = einsum(equation = var_1235_equation_0, values = (var_1143_cast_fp16_13, var_1205_cast_fp16))[name = tensor<string, []>("op_1235_cast_fp16")];
+            tensor<string, []> var_1237_equation_0 = const()[name = tensor<string, []>("op_1237_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1237_cast_fp16 = einsum(equation = var_1237_equation_0, values = (var_1143_cast_fp16_14, var_1206_cast_fp16))[name = tensor<string, []>("op_1237_cast_fp16")];
+            tensor<string, []> var_1239_equation_0 = const()[name = tensor<string, []>("op_1239_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1239_cast_fp16 = einsum(equation = var_1239_equation_0, values = (var_1143_cast_fp16_15, var_1207_cast_fp16))[name = tensor<string, []>("op_1239_cast_fp16")];
+            tensor<bool, []> input_45_interleave_0 = const()[name = tensor<string, []>("input_45_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_45_cast_fp16 = concat(axis = var_1056, interleave = input_45_interleave_0, values = (var_1209_cast_fp16, var_1211_cast_fp16, var_1213_cast_fp16, var_1215_cast_fp16, var_1217_cast_fp16, var_1219_cast_fp16, var_1221_cast_fp16, var_1223_cast_fp16, var_1225_cast_fp16, var_1227_cast_fp16, var_1229_cast_fp16, var_1231_cast_fp16, var_1233_cast_fp16, var_1235_cast_fp16, var_1237_cast_fp16, var_1239_cast_fp16))[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<string, []> var_1248_pad_type_0 = const()[name = tensor<string, []>("op_1248_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1248_strides_0 = const()[name = tensor<string, []>("op_1248_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1248_pad_0 = const()[name = tensor<string, []>("op_1248_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1248_dilations_0 = const()[name = tensor<string, []>("op_1248_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1248_groups_0 = const()[name = tensor<string, []>("op_1248_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_4_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(116924992)))];
+            tensor<fp16, [1024]> blocks_4_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119022208)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1248_cast_fp16 = conv(bias = blocks_4_attn_out_bias_to_fp16, dilations = var_1248_dilations_0, groups = var_1248_groups_0, pad = var_1248_pad_0, pad_type = var_1248_pad_type_0, strides = var_1248_strides_0, weight = blocks_4_attn_out_weight_to_fp16, x = input_45_cast_fp16)[name = tensor<string, []>("op_1248_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = var_1248_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, [1]> input_47_axes_0 = const()[name = tensor<string, []>("input_47_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_47_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_47_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119024320)))];
+            tensor<fp16, [1024]> input_47_beta_0_to_fp16 = const()[name = tensor<string, []>("input_47_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119026432)))];
+            tensor<fp16, []> var_1258_to_fp16 = const()[name = tensor<string, []>("op_1258_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, beta = input_47_beta_0_to_fp16, epsilon = var_1258_to_fp16, gamma = input_47_gamma_0_to_fp16, x = inputs_19_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<string, []> input_49_pad_type_0 = const()[name = tensor<string, []>("input_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_49_strides_0 = const()[name = tensor<string, []>("input_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_49_pad_0 = const()[name = tensor<string, []>("input_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_49_dilations_0 = const()[name = tensor<string, []>("input_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_49_groups_0 = const()[name = tensor<string, []>("input_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_4_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119028544)))];
+            tensor<fp16, [4096]> blocks_4_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127417216)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_49_cast_fp16 = conv(bias = blocks_4_mlp_0_bias_to_fp16, dilations = input_49_dilations_0, groups = input_49_groups_0, pad = input_49_pad_0, pad_type = input_49_pad_type_0, strides = input_49_strides_0, weight = blocks_4_mlp_0_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<string, []> input_51_mode_0 = const()[name = tensor<string, []>("input_51_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_51_cast_fp16 = gelu(mode = input_51_mode_0, x = input_49_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<string, []> var_1284_pad_type_0 = const()[name = tensor<string, []>("op_1284_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1284_strides_0 = const()[name = tensor<string, []>("op_1284_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1284_pad_0 = const()[name = tensor<string, []>("op_1284_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1284_dilations_0 = const()[name = tensor<string, []>("op_1284_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1284_groups_0 = const()[name = tensor<string, []>("op_1284_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_4_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(127425472)))];
+            tensor<fp16, [1024]> blocks_4_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135814144)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1284_cast_fp16 = conv(bias = blocks_4_mlp_2_bias_to_fp16, dilations = var_1284_dilations_0, groups = var_1284_groups_0, pad = var_1284_pad_0, pad_type = var_1284_pad_type_0, strides = var_1284_strides_0, weight = blocks_4_mlp_2_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("op_1284_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = var_1284_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, []> var_1293 = const()[name = tensor<string, []>("op_1293"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_53_axes_0 = const()[name = tensor<string, []>("input_53_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_53_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_53_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135816256)))];
+            tensor<fp16, [1024]> input_53_beta_0_to_fp16 = const()[name = tensor<string, []>("input_53_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135818368)))];
+            tensor<fp16, []> var_1309_to_fp16 = const()[name = tensor<string, []>("op_1309_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_53_cast_fp16 = layer_norm(axes = input_53_axes_0, beta = input_53_beta_0_to_fp16, epsilon = var_1309_to_fp16, gamma = input_53_gamma_0_to_fp16, x = inputs_21_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<string, []> q_11_pad_type_0 = const()[name = tensor<string, []>("q_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_11_strides_0 = const()[name = tensor<string, []>("q_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_11_pad_0 = const()[name = tensor<string, []>("q_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_11_dilations_0 = const()[name = tensor<string, []>("q_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_11_groups_0 = const()[name = tensor<string, []>("q_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_1344_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1344_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(135820480)))];
+            tensor<fp16, [1024]> var_1344_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1344_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137917696)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1344_cast_fp16 = conv(bias = var_1344_bias_0_to_fp16, dilations = q_11_dilations_0, groups = q_11_groups_0, pad = q_11_pad_0, pad_type = q_11_pad_type_0, strides = q_11_strides_0, weight = var_1344_weight_0_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_1344_cast_fp16")];
+            tensor<string, []> k_11_pad_type_0 = const()[name = tensor<string, []>("k_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_11_strides_0 = const()[name = tensor<string, []>("k_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_11_pad_0 = const()[name = tensor<string, []>("k_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_11_dilations_0 = const()[name = tensor<string, []>("k_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_11_groups_0 = const()[name = tensor<string, []>("k_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_5_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137919808)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_11_cast_fp16 = conv(dilations = k_11_dilations_0, groups = k_11_groups_0, pad = k_11_pad_0, pad_type = k_11_pad_type_0, strides = k_11_strides_0, weight = blocks_5_attn_key_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("k_11_cast_fp16")];
+            tensor<string, []> var_1342_pad_type_0 = const()[name = tensor<string, []>("op_1342_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1342_strides_0 = const()[name = tensor<string, []>("op_1342_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1342_pad_0 = const()[name = tensor<string, []>("op_1342_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1342_dilations_0 = const()[name = tensor<string, []>("op_1342_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1342_groups_0 = const()[name = tensor<string, []>("op_1342_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_5_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(140017024)))];
+            tensor<fp16, [1024]> blocks_5_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142114240)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1342_cast_fp16 = conv(bias = blocks_5_attn_value_bias_to_fp16, dilations = var_1342_dilations_0, groups = var_1342_groups_0, pad = var_1342_pad_0, pad_type = var_1342_pad_type_0, strides = var_1342_strides_0, weight = blocks_5_attn_value_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_1342_cast_fp16")];
+            tensor<int32, [16]> tile_15 = const()[name = tensor<string, []>("tile_15"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1345_axis_0 = const()[name = tensor<string, []>("op_1345_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1345_cast_fp16_15 = split(axis = var_1345_axis_0, split_sizes = tile_15, x = var_1344_cast_fp16)[name = tensor<string, []>("op_1345_cast_fp16")];
+            tensor<int32, [4]> var_1362_perm_0 = const()[name = tensor<string, []>("op_1362_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_16 = const()[name = tensor<string, []>("tile_16"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1363_axis_0 = const()[name = tensor<string, []>("op_1363_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_1362_cast_fp16 = transpose(perm = var_1362_perm_0, x = k_11_cast_fp16)[name = tensor<string, []>("transpose_19")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1363_cast_fp16_15 = split(axis = var_1363_axis_0, split_sizes = tile_16, x = var_1362_cast_fp16)[name = tensor<string, []>("op_1363_cast_fp16")];
+            tensor<int32, [16]> tile_17 = const()[name = tensor<string, []>("tile_17"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1380_axis_0 = const()[name = tensor<string, []>("op_1380_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1380_cast_fp16_15 = split(axis = var_1380_axis_0, split_sizes = tile_17, x = var_1342_cast_fp16)[name = tensor<string, []>("op_1380_cast_fp16")];
+            tensor<string, []> aw_161_equation_0 = const()[name = tensor<string, []>("aw_161_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_161_cast_fp16 = einsum(equation = aw_161_equation_0, values = (var_1363_cast_fp16_0, var_1345_cast_fp16_0))[name = tensor<string, []>("aw_161_cast_fp16")];
+            tensor<string, []> aw_163_equation_0 = const()[name = tensor<string, []>("aw_163_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_163_cast_fp16 = einsum(equation = aw_163_equation_0, values = (var_1363_cast_fp16_1, var_1345_cast_fp16_1))[name = tensor<string, []>("aw_163_cast_fp16")];
+            tensor<string, []> aw_165_equation_0 = const()[name = tensor<string, []>("aw_165_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_165_cast_fp16 = einsum(equation = aw_165_equation_0, values = (var_1363_cast_fp16_2, var_1345_cast_fp16_2))[name = tensor<string, []>("aw_165_cast_fp16")];
+            tensor<string, []> aw_167_equation_0 = const()[name = tensor<string, []>("aw_167_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_167_cast_fp16 = einsum(equation = aw_167_equation_0, values = (var_1363_cast_fp16_3, var_1345_cast_fp16_3))[name = tensor<string, []>("aw_167_cast_fp16")];
+            tensor<string, []> aw_169_equation_0 = const()[name = tensor<string, []>("aw_169_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_169_cast_fp16 = einsum(equation = aw_169_equation_0, values = (var_1363_cast_fp16_4, var_1345_cast_fp16_4))[name = tensor<string, []>("aw_169_cast_fp16")];
+            tensor<string, []> aw_171_equation_0 = const()[name = tensor<string, []>("aw_171_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_171_cast_fp16 = einsum(equation = aw_171_equation_0, values = (var_1363_cast_fp16_5, var_1345_cast_fp16_5))[name = tensor<string, []>("aw_171_cast_fp16")];
+            tensor<string, []> aw_173_equation_0 = const()[name = tensor<string, []>("aw_173_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_173_cast_fp16 = einsum(equation = aw_173_equation_0, values = (var_1363_cast_fp16_6, var_1345_cast_fp16_6))[name = tensor<string, []>("aw_173_cast_fp16")];
+            tensor<string, []> aw_175_equation_0 = const()[name = tensor<string, []>("aw_175_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_175_cast_fp16 = einsum(equation = aw_175_equation_0, values = (var_1363_cast_fp16_7, var_1345_cast_fp16_7))[name = tensor<string, []>("aw_175_cast_fp16")];
+            tensor<string, []> aw_177_equation_0 = const()[name = tensor<string, []>("aw_177_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_177_cast_fp16 = einsum(equation = aw_177_equation_0, values = (var_1363_cast_fp16_8, var_1345_cast_fp16_8))[name = tensor<string, []>("aw_177_cast_fp16")];
+            tensor<string, []> aw_179_equation_0 = const()[name = tensor<string, []>("aw_179_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_179_cast_fp16 = einsum(equation = aw_179_equation_0, values = (var_1363_cast_fp16_9, var_1345_cast_fp16_9))[name = tensor<string, []>("aw_179_cast_fp16")];
+            tensor<string, []> aw_181_equation_0 = const()[name = tensor<string, []>("aw_181_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_181_cast_fp16 = einsum(equation = aw_181_equation_0, values = (var_1363_cast_fp16_10, var_1345_cast_fp16_10))[name = tensor<string, []>("aw_181_cast_fp16")];
+            tensor<string, []> aw_183_equation_0 = const()[name = tensor<string, []>("aw_183_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_183_cast_fp16 = einsum(equation = aw_183_equation_0, values = (var_1363_cast_fp16_11, var_1345_cast_fp16_11))[name = tensor<string, []>("aw_183_cast_fp16")];
+            tensor<string, []> aw_185_equation_0 = const()[name = tensor<string, []>("aw_185_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_185_cast_fp16 = einsum(equation = aw_185_equation_0, values = (var_1363_cast_fp16_12, var_1345_cast_fp16_12))[name = tensor<string, []>("aw_185_cast_fp16")];
+            tensor<string, []> aw_187_equation_0 = const()[name = tensor<string, []>("aw_187_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_187_cast_fp16 = einsum(equation = aw_187_equation_0, values = (var_1363_cast_fp16_13, var_1345_cast_fp16_13))[name = tensor<string, []>("aw_187_cast_fp16")];
+            tensor<string, []> aw_189_equation_0 = const()[name = tensor<string, []>("aw_189_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_189_cast_fp16 = einsum(equation = aw_189_equation_0, values = (var_1363_cast_fp16_14, var_1345_cast_fp16_14))[name = tensor<string, []>("aw_189_cast_fp16")];
+            tensor<string, []> aw_191_equation_0 = const()[name = tensor<string, []>("aw_191_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_191_cast_fp16 = einsum(equation = aw_191_equation_0, values = (var_1363_cast_fp16_15, var_1345_cast_fp16_15))[name = tensor<string, []>("aw_191_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1429_cast_fp16 = softmax(axis = var_1293, x = aw_161_cast_fp16)[name = tensor<string, []>("op_1429_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1430_cast_fp16 = softmax(axis = var_1293, x = aw_163_cast_fp16)[name = tensor<string, []>("op_1430_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1431_cast_fp16 = softmax(axis = var_1293, x = aw_165_cast_fp16)[name = tensor<string, []>("op_1431_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1432_cast_fp16 = softmax(axis = var_1293, x = aw_167_cast_fp16)[name = tensor<string, []>("op_1432_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1433_cast_fp16 = softmax(axis = var_1293, x = aw_169_cast_fp16)[name = tensor<string, []>("op_1433_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1434_cast_fp16 = softmax(axis = var_1293, x = aw_171_cast_fp16)[name = tensor<string, []>("op_1434_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1435_cast_fp16 = softmax(axis = var_1293, x = aw_173_cast_fp16)[name = tensor<string, []>("op_1435_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1436_cast_fp16 = softmax(axis = var_1293, x = aw_175_cast_fp16)[name = tensor<string, []>("op_1436_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1437_cast_fp16 = softmax(axis = var_1293, x = aw_177_cast_fp16)[name = tensor<string, []>("op_1437_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1438_cast_fp16 = softmax(axis = var_1293, x = aw_179_cast_fp16)[name = tensor<string, []>("op_1438_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1439_cast_fp16 = softmax(axis = var_1293, x = aw_181_cast_fp16)[name = tensor<string, []>("op_1439_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1440_cast_fp16 = softmax(axis = var_1293, x = aw_183_cast_fp16)[name = tensor<string, []>("op_1440_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1441_cast_fp16 = softmax(axis = var_1293, x = aw_185_cast_fp16)[name = tensor<string, []>("op_1441_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1442_cast_fp16 = softmax(axis = var_1293, x = aw_187_cast_fp16)[name = tensor<string, []>("op_1442_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1443_cast_fp16 = softmax(axis = var_1293, x = aw_189_cast_fp16)[name = tensor<string, []>("op_1443_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1444_cast_fp16 = softmax(axis = var_1293, x = aw_191_cast_fp16)[name = tensor<string, []>("op_1444_cast_fp16")];
+            tensor<string, []> var_1446_equation_0 = const()[name = tensor<string, []>("op_1446_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1446_cast_fp16 = einsum(equation = var_1446_equation_0, values = (var_1380_cast_fp16_0, var_1429_cast_fp16))[name = tensor<string, []>("op_1446_cast_fp16")];
+            tensor<string, []> var_1448_equation_0 = const()[name = tensor<string, []>("op_1448_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1448_cast_fp16 = einsum(equation = var_1448_equation_0, values = (var_1380_cast_fp16_1, var_1430_cast_fp16))[name = tensor<string, []>("op_1448_cast_fp16")];
+            tensor<string, []> var_1450_equation_0 = const()[name = tensor<string, []>("op_1450_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1450_cast_fp16 = einsum(equation = var_1450_equation_0, values = (var_1380_cast_fp16_2, var_1431_cast_fp16))[name = tensor<string, []>("op_1450_cast_fp16")];
+            tensor<string, []> var_1452_equation_0 = const()[name = tensor<string, []>("op_1452_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1452_cast_fp16 = einsum(equation = var_1452_equation_0, values = (var_1380_cast_fp16_3, var_1432_cast_fp16))[name = tensor<string, []>("op_1452_cast_fp16")];
+            tensor<string, []> var_1454_equation_0 = const()[name = tensor<string, []>("op_1454_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1454_cast_fp16 = einsum(equation = var_1454_equation_0, values = (var_1380_cast_fp16_4, var_1433_cast_fp16))[name = tensor<string, []>("op_1454_cast_fp16")];
+            tensor<string, []> var_1456_equation_0 = const()[name = tensor<string, []>("op_1456_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1456_cast_fp16 = einsum(equation = var_1456_equation_0, values = (var_1380_cast_fp16_5, var_1434_cast_fp16))[name = tensor<string, []>("op_1456_cast_fp16")];
+            tensor<string, []> var_1458_equation_0 = const()[name = tensor<string, []>("op_1458_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1458_cast_fp16 = einsum(equation = var_1458_equation_0, values = (var_1380_cast_fp16_6, var_1435_cast_fp16))[name = tensor<string, []>("op_1458_cast_fp16")];
+            tensor<string, []> var_1460_equation_0 = const()[name = tensor<string, []>("op_1460_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1460_cast_fp16 = einsum(equation = var_1460_equation_0, values = (var_1380_cast_fp16_7, var_1436_cast_fp16))[name = tensor<string, []>("op_1460_cast_fp16")];
+            tensor<string, []> var_1462_equation_0 = const()[name = tensor<string, []>("op_1462_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1462_cast_fp16 = einsum(equation = var_1462_equation_0, values = (var_1380_cast_fp16_8, var_1437_cast_fp16))[name = tensor<string, []>("op_1462_cast_fp16")];
+            tensor<string, []> var_1464_equation_0 = const()[name = tensor<string, []>("op_1464_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1464_cast_fp16 = einsum(equation = var_1464_equation_0, values = (var_1380_cast_fp16_9, var_1438_cast_fp16))[name = tensor<string, []>("op_1464_cast_fp16")];
+            tensor<string, []> var_1466_equation_0 = const()[name = tensor<string, []>("op_1466_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1466_cast_fp16 = einsum(equation = var_1466_equation_0, values = (var_1380_cast_fp16_10, var_1439_cast_fp16))[name = tensor<string, []>("op_1466_cast_fp16")];
+            tensor<string, []> var_1468_equation_0 = const()[name = tensor<string, []>("op_1468_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1468_cast_fp16 = einsum(equation = var_1468_equation_0, values = (var_1380_cast_fp16_11, var_1440_cast_fp16))[name = tensor<string, []>("op_1468_cast_fp16")];
+            tensor<string, []> var_1470_equation_0 = const()[name = tensor<string, []>("op_1470_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1470_cast_fp16 = einsum(equation = var_1470_equation_0, values = (var_1380_cast_fp16_12, var_1441_cast_fp16))[name = tensor<string, []>("op_1470_cast_fp16")];
+            tensor<string, []> var_1472_equation_0 = const()[name = tensor<string, []>("op_1472_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1472_cast_fp16 = einsum(equation = var_1472_equation_0, values = (var_1380_cast_fp16_13, var_1442_cast_fp16))[name = tensor<string, []>("op_1472_cast_fp16")];
+            tensor<string, []> var_1474_equation_0 = const()[name = tensor<string, []>("op_1474_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1474_cast_fp16 = einsum(equation = var_1474_equation_0, values = (var_1380_cast_fp16_14, var_1443_cast_fp16))[name = tensor<string, []>("op_1474_cast_fp16")];
+            tensor<string, []> var_1476_equation_0 = const()[name = tensor<string, []>("op_1476_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1476_cast_fp16 = einsum(equation = var_1476_equation_0, values = (var_1380_cast_fp16_15, var_1444_cast_fp16))[name = tensor<string, []>("op_1476_cast_fp16")];
+            tensor<bool, []> input_55_interleave_0 = const()[name = tensor<string, []>("input_55_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_55_cast_fp16 = concat(axis = var_1293, interleave = input_55_interleave_0, values = (var_1446_cast_fp16, var_1448_cast_fp16, var_1450_cast_fp16, var_1452_cast_fp16, var_1454_cast_fp16, var_1456_cast_fp16, var_1458_cast_fp16, var_1460_cast_fp16, var_1462_cast_fp16, var_1464_cast_fp16, var_1466_cast_fp16, var_1468_cast_fp16, var_1470_cast_fp16, var_1472_cast_fp16, var_1474_cast_fp16, var_1476_cast_fp16))[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<string, []> var_1485_pad_type_0 = const()[name = tensor<string, []>("op_1485_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1485_strides_0 = const()[name = tensor<string, []>("op_1485_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1485_pad_0 = const()[name = tensor<string, []>("op_1485_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1485_dilations_0 = const()[name = tensor<string, []>("op_1485_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1485_groups_0 = const()[name = tensor<string, []>("op_1485_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_5_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(142116352)))];
+            tensor<fp16, [1024]> blocks_5_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144213568)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1485_cast_fp16 = conv(bias = blocks_5_attn_out_bias_to_fp16, dilations = var_1485_dilations_0, groups = var_1485_groups_0, pad = var_1485_pad_0, pad_type = var_1485_pad_type_0, strides = var_1485_strides_0, weight = blocks_5_attn_out_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("op_1485_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = var_1485_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> input_57_axes_0 = const()[name = tensor<string, []>("input_57_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_57_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_57_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144215680)))];
+            tensor<fp16, [1024]> input_57_beta_0_to_fp16 = const()[name = tensor<string, []>("input_57_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144217792)))];
+            tensor<fp16, []> var_1495_to_fp16 = const()[name = tensor<string, []>("op_1495_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_57_cast_fp16 = layer_norm(axes = input_57_axes_0, beta = input_57_beta_0_to_fp16, epsilon = var_1495_to_fp16, gamma = input_57_gamma_0_to_fp16, x = inputs_23_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<string, []> input_59_pad_type_0 = const()[name = tensor<string, []>("input_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_59_strides_0 = const()[name = tensor<string, []>("input_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_59_pad_0 = const()[name = tensor<string, []>("input_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_59_dilations_0 = const()[name = tensor<string, []>("input_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_59_groups_0 = const()[name = tensor<string, []>("input_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_5_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(144219904)))];
+            tensor<fp16, [4096]> blocks_5_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152608576)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_59_cast_fp16 = conv(bias = blocks_5_mlp_0_bias_to_fp16, dilations = input_59_dilations_0, groups = input_59_groups_0, pad = input_59_pad_0, pad_type = input_59_pad_type_0, strides = input_59_strides_0, weight = blocks_5_mlp_0_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<string, []> input_61_mode_0 = const()[name = tensor<string, []>("input_61_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_61_cast_fp16 = gelu(mode = input_61_mode_0, x = input_59_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
+            tensor<string, []> var_1521_pad_type_0 = const()[name = tensor<string, []>("op_1521_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1521_strides_0 = const()[name = tensor<string, []>("op_1521_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1521_pad_0 = const()[name = tensor<string, []>("op_1521_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1521_dilations_0 = const()[name = tensor<string, []>("op_1521_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1521_groups_0 = const()[name = tensor<string, []>("op_1521_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_5_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152616832)))];
+            tensor<fp16, [1024]> blocks_5_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161005504)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1521_cast_fp16 = conv(bias = blocks_5_mlp_2_bias_to_fp16, dilations = var_1521_dilations_0, groups = var_1521_groups_0, pad = var_1521_pad_0, pad_type = var_1521_pad_type_0, strides = var_1521_strides_0, weight = blocks_5_mlp_2_weight_to_fp16, x = input_61_cast_fp16)[name = tensor<string, []>("op_1521_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = var_1521_cast_fp16)[name = tensor<string, []>("inputs_25_cast_fp16")];
+            tensor<int32, []> var_1530 = const()[name = tensor<string, []>("op_1530"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_63_axes_0 = const()[name = tensor<string, []>("input_63_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_63_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_63_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161007616)))];
+            tensor<fp16, [1024]> input_63_beta_0_to_fp16 = const()[name = tensor<string, []>("input_63_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161009728)))];
+            tensor<fp16, []> var_1546_to_fp16 = const()[name = tensor<string, []>("op_1546_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_63_cast_fp16 = layer_norm(axes = input_63_axes_0, beta = input_63_beta_0_to_fp16, epsilon = var_1546_to_fp16, gamma = input_63_gamma_0_to_fp16, x = inputs_25_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
+            tensor<string, []> q_13_pad_type_0 = const()[name = tensor<string, []>("q_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_13_strides_0 = const()[name = tensor<string, []>("q_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_13_pad_0 = const()[name = tensor<string, []>("q_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_13_dilations_0 = const()[name = tensor<string, []>("q_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_13_groups_0 = const()[name = tensor<string, []>("q_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_1581_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1581_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(161011840)))];
+            tensor<fp16, [1024]> var_1581_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1581_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163109056)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1581_cast_fp16 = conv(bias = var_1581_bias_0_to_fp16, dilations = q_13_dilations_0, groups = q_13_groups_0, pad = q_13_pad_0, pad_type = q_13_pad_type_0, strides = q_13_strides_0, weight = var_1581_weight_0_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("op_1581_cast_fp16")];
+            tensor<string, []> k_13_pad_type_0 = const()[name = tensor<string, []>("k_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_13_strides_0 = const()[name = tensor<string, []>("k_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_13_pad_0 = const()[name = tensor<string, []>("k_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_13_dilations_0 = const()[name = tensor<string, []>("k_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_13_groups_0 = const()[name = tensor<string, []>("k_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_6_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163111168)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_13_cast_fp16 = conv(dilations = k_13_dilations_0, groups = k_13_groups_0, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = k_13_strides_0, weight = blocks_6_attn_key_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("k_13_cast_fp16")];
+            tensor<string, []> var_1579_pad_type_0 = const()[name = tensor<string, []>("op_1579_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1579_strides_0 = const()[name = tensor<string, []>("op_1579_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1579_pad_0 = const()[name = tensor<string, []>("op_1579_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1579_dilations_0 = const()[name = tensor<string, []>("op_1579_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1579_groups_0 = const()[name = tensor<string, []>("op_1579_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_6_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165208384)))];
+            tensor<fp16, [1024]> blocks_6_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167305600)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1579_cast_fp16 = conv(bias = blocks_6_attn_value_bias_to_fp16, dilations = var_1579_dilations_0, groups = var_1579_groups_0, pad = var_1579_pad_0, pad_type = var_1579_pad_type_0, strides = var_1579_strides_0, weight = blocks_6_attn_value_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("op_1579_cast_fp16")];
+            tensor<int32, [16]> tile_18 = const()[name = tensor<string, []>("tile_18"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1582_axis_0 = const()[name = tensor<string, []>("op_1582_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1582_cast_fp16_15 = split(axis = var_1582_axis_0, split_sizes = tile_18, x = var_1581_cast_fp16)[name = tensor<string, []>("op_1582_cast_fp16")];
+            tensor<int32, [4]> var_1599_perm_0 = const()[name = tensor<string, []>("op_1599_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_19 = const()[name = tensor<string, []>("tile_19"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1600_axis_0 = const()[name = tensor<string, []>("op_1600_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_1599_cast_fp16 = transpose(perm = var_1599_perm_0, x = k_13_cast_fp16)[name = tensor<string, []>("transpose_18")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1600_cast_fp16_15 = split(axis = var_1600_axis_0, split_sizes = tile_19, x = var_1599_cast_fp16)[name = tensor<string, []>("op_1600_cast_fp16")];
+            tensor<int32, [16]> tile_20 = const()[name = tensor<string, []>("tile_20"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1617_axis_0 = const()[name = tensor<string, []>("op_1617_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1617_cast_fp16_15 = split(axis = var_1617_axis_0, split_sizes = tile_20, x = var_1579_cast_fp16)[name = tensor<string, []>("op_1617_cast_fp16")];
+            tensor<string, []> aw_193_equation_0 = const()[name = tensor<string, []>("aw_193_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_193_cast_fp16 = einsum(equation = aw_193_equation_0, values = (var_1600_cast_fp16_0, var_1582_cast_fp16_0))[name = tensor<string, []>("aw_193_cast_fp16")];
+            tensor<string, []> aw_195_equation_0 = const()[name = tensor<string, []>("aw_195_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_195_cast_fp16 = einsum(equation = aw_195_equation_0, values = (var_1600_cast_fp16_1, var_1582_cast_fp16_1))[name = tensor<string, []>("aw_195_cast_fp16")];
+            tensor<string, []> aw_197_equation_0 = const()[name = tensor<string, []>("aw_197_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_197_cast_fp16 = einsum(equation = aw_197_equation_0, values = (var_1600_cast_fp16_2, var_1582_cast_fp16_2))[name = tensor<string, []>("aw_197_cast_fp16")];
+            tensor<string, []> aw_199_equation_0 = const()[name = tensor<string, []>("aw_199_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_199_cast_fp16 = einsum(equation = aw_199_equation_0, values = (var_1600_cast_fp16_3, var_1582_cast_fp16_3))[name = tensor<string, []>("aw_199_cast_fp16")];
+            tensor<string, []> aw_201_equation_0 = const()[name = tensor<string, []>("aw_201_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_201_cast_fp16 = einsum(equation = aw_201_equation_0, values = (var_1600_cast_fp16_4, var_1582_cast_fp16_4))[name = tensor<string, []>("aw_201_cast_fp16")];
+            tensor<string, []> aw_203_equation_0 = const()[name = tensor<string, []>("aw_203_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_203_cast_fp16 = einsum(equation = aw_203_equation_0, values = (var_1600_cast_fp16_5, var_1582_cast_fp16_5))[name = tensor<string, []>("aw_203_cast_fp16")];
+            tensor<string, []> aw_205_equation_0 = const()[name = tensor<string, []>("aw_205_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_205_cast_fp16 = einsum(equation = aw_205_equation_0, values = (var_1600_cast_fp16_6, var_1582_cast_fp16_6))[name = tensor<string, []>("aw_205_cast_fp16")];
+            tensor<string, []> aw_207_equation_0 = const()[name = tensor<string, []>("aw_207_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_207_cast_fp16 = einsum(equation = aw_207_equation_0, values = (var_1600_cast_fp16_7, var_1582_cast_fp16_7))[name = tensor<string, []>("aw_207_cast_fp16")];
+            tensor<string, []> aw_209_equation_0 = const()[name = tensor<string, []>("aw_209_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_209_cast_fp16 = einsum(equation = aw_209_equation_0, values = (var_1600_cast_fp16_8, var_1582_cast_fp16_8))[name = tensor<string, []>("aw_209_cast_fp16")];
+            tensor<string, []> aw_211_equation_0 = const()[name = tensor<string, []>("aw_211_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_211_cast_fp16 = einsum(equation = aw_211_equation_0, values = (var_1600_cast_fp16_9, var_1582_cast_fp16_9))[name = tensor<string, []>("aw_211_cast_fp16")];
+            tensor<string, []> aw_213_equation_0 = const()[name = tensor<string, []>("aw_213_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_213_cast_fp16 = einsum(equation = aw_213_equation_0, values = (var_1600_cast_fp16_10, var_1582_cast_fp16_10))[name = tensor<string, []>("aw_213_cast_fp16")];
+            tensor<string, []> aw_215_equation_0 = const()[name = tensor<string, []>("aw_215_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_215_cast_fp16 = einsum(equation = aw_215_equation_0, values = (var_1600_cast_fp16_11, var_1582_cast_fp16_11))[name = tensor<string, []>("aw_215_cast_fp16")];
+            tensor<string, []> aw_217_equation_0 = const()[name = tensor<string, []>("aw_217_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_217_cast_fp16 = einsum(equation = aw_217_equation_0, values = (var_1600_cast_fp16_12, var_1582_cast_fp16_12))[name = tensor<string, []>("aw_217_cast_fp16")];
+            tensor<string, []> aw_219_equation_0 = const()[name = tensor<string, []>("aw_219_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_219_cast_fp16 = einsum(equation = aw_219_equation_0, values = (var_1600_cast_fp16_13, var_1582_cast_fp16_13))[name = tensor<string, []>("aw_219_cast_fp16")];
+            tensor<string, []> aw_221_equation_0 = const()[name = tensor<string, []>("aw_221_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_221_cast_fp16 = einsum(equation = aw_221_equation_0, values = (var_1600_cast_fp16_14, var_1582_cast_fp16_14))[name = tensor<string, []>("aw_221_cast_fp16")];
+            tensor<string, []> aw_223_equation_0 = const()[name = tensor<string, []>("aw_223_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_223_cast_fp16 = einsum(equation = aw_223_equation_0, values = (var_1600_cast_fp16_15, var_1582_cast_fp16_15))[name = tensor<string, []>("aw_223_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1666_cast_fp16 = softmax(axis = var_1530, x = aw_193_cast_fp16)[name = tensor<string, []>("op_1666_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1667_cast_fp16 = softmax(axis = var_1530, x = aw_195_cast_fp16)[name = tensor<string, []>("op_1667_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1668_cast_fp16 = softmax(axis = var_1530, x = aw_197_cast_fp16)[name = tensor<string, []>("op_1668_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1669_cast_fp16 = softmax(axis = var_1530, x = aw_199_cast_fp16)[name = tensor<string, []>("op_1669_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1670_cast_fp16 = softmax(axis = var_1530, x = aw_201_cast_fp16)[name = tensor<string, []>("op_1670_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1671_cast_fp16 = softmax(axis = var_1530, x = aw_203_cast_fp16)[name = tensor<string, []>("op_1671_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1672_cast_fp16 = softmax(axis = var_1530, x = aw_205_cast_fp16)[name = tensor<string, []>("op_1672_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1673_cast_fp16 = softmax(axis = var_1530, x = aw_207_cast_fp16)[name = tensor<string, []>("op_1673_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1674_cast_fp16 = softmax(axis = var_1530, x = aw_209_cast_fp16)[name = tensor<string, []>("op_1674_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1675_cast_fp16 = softmax(axis = var_1530, x = aw_211_cast_fp16)[name = tensor<string, []>("op_1675_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1676_cast_fp16 = softmax(axis = var_1530, x = aw_213_cast_fp16)[name = tensor<string, []>("op_1676_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1677_cast_fp16 = softmax(axis = var_1530, x = aw_215_cast_fp16)[name = tensor<string, []>("op_1677_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1678_cast_fp16 = softmax(axis = var_1530, x = aw_217_cast_fp16)[name = tensor<string, []>("op_1678_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1679_cast_fp16 = softmax(axis = var_1530, x = aw_219_cast_fp16)[name = tensor<string, []>("op_1679_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1680_cast_fp16 = softmax(axis = var_1530, x = aw_221_cast_fp16)[name = tensor<string, []>("op_1680_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1681_cast_fp16 = softmax(axis = var_1530, x = aw_223_cast_fp16)[name = tensor<string, []>("op_1681_cast_fp16")];
+            tensor<string, []> var_1683_equation_0 = const()[name = tensor<string, []>("op_1683_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1683_cast_fp16 = einsum(equation = var_1683_equation_0, values = (var_1617_cast_fp16_0, var_1666_cast_fp16))[name = tensor<string, []>("op_1683_cast_fp16")];
+            tensor<string, []> var_1685_equation_0 = const()[name = tensor<string, []>("op_1685_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1685_cast_fp16 = einsum(equation = var_1685_equation_0, values = (var_1617_cast_fp16_1, var_1667_cast_fp16))[name = tensor<string, []>("op_1685_cast_fp16")];
+            tensor<string, []> var_1687_equation_0 = const()[name = tensor<string, []>("op_1687_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1687_cast_fp16 = einsum(equation = var_1687_equation_0, values = (var_1617_cast_fp16_2, var_1668_cast_fp16))[name = tensor<string, []>("op_1687_cast_fp16")];
+            tensor<string, []> var_1689_equation_0 = const()[name = tensor<string, []>("op_1689_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1689_cast_fp16 = einsum(equation = var_1689_equation_0, values = (var_1617_cast_fp16_3, var_1669_cast_fp16))[name = tensor<string, []>("op_1689_cast_fp16")];
+            tensor<string, []> var_1691_equation_0 = const()[name = tensor<string, []>("op_1691_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1691_cast_fp16 = einsum(equation = var_1691_equation_0, values = (var_1617_cast_fp16_4, var_1670_cast_fp16))[name = tensor<string, []>("op_1691_cast_fp16")];
+            tensor<string, []> var_1693_equation_0 = const()[name = tensor<string, []>("op_1693_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1693_cast_fp16 = einsum(equation = var_1693_equation_0, values = (var_1617_cast_fp16_5, var_1671_cast_fp16))[name = tensor<string, []>("op_1693_cast_fp16")];
+            tensor<string, []> var_1695_equation_0 = const()[name = tensor<string, []>("op_1695_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1695_cast_fp16 = einsum(equation = var_1695_equation_0, values = (var_1617_cast_fp16_6, var_1672_cast_fp16))[name = tensor<string, []>("op_1695_cast_fp16")];
+            tensor<string, []> var_1697_equation_0 = const()[name = tensor<string, []>("op_1697_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1697_cast_fp16 = einsum(equation = var_1697_equation_0, values = (var_1617_cast_fp16_7, var_1673_cast_fp16))[name = tensor<string, []>("op_1697_cast_fp16")];
+            tensor<string, []> var_1699_equation_0 = const()[name = tensor<string, []>("op_1699_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1699_cast_fp16 = einsum(equation = var_1699_equation_0, values = (var_1617_cast_fp16_8, var_1674_cast_fp16))[name = tensor<string, []>("op_1699_cast_fp16")];
+            tensor<string, []> var_1701_equation_0 = const()[name = tensor<string, []>("op_1701_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1701_cast_fp16 = einsum(equation = var_1701_equation_0, values = (var_1617_cast_fp16_9, var_1675_cast_fp16))[name = tensor<string, []>("op_1701_cast_fp16")];
+            tensor<string, []> var_1703_equation_0 = const()[name = tensor<string, []>("op_1703_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1703_cast_fp16 = einsum(equation = var_1703_equation_0, values = (var_1617_cast_fp16_10, var_1676_cast_fp16))[name = tensor<string, []>("op_1703_cast_fp16")];
+            tensor<string, []> var_1705_equation_0 = const()[name = tensor<string, []>("op_1705_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1705_cast_fp16 = einsum(equation = var_1705_equation_0, values = (var_1617_cast_fp16_11, var_1677_cast_fp16))[name = tensor<string, []>("op_1705_cast_fp16")];
+            tensor<string, []> var_1707_equation_0 = const()[name = tensor<string, []>("op_1707_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1707_cast_fp16 = einsum(equation = var_1707_equation_0, values = (var_1617_cast_fp16_12, var_1678_cast_fp16))[name = tensor<string, []>("op_1707_cast_fp16")];
+            tensor<string, []> var_1709_equation_0 = const()[name = tensor<string, []>("op_1709_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1709_cast_fp16 = einsum(equation = var_1709_equation_0, values = (var_1617_cast_fp16_13, var_1679_cast_fp16))[name = tensor<string, []>("op_1709_cast_fp16")];
+            tensor<string, []> var_1711_equation_0 = const()[name = tensor<string, []>("op_1711_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1711_cast_fp16 = einsum(equation = var_1711_equation_0, values = (var_1617_cast_fp16_14, var_1680_cast_fp16))[name = tensor<string, []>("op_1711_cast_fp16")];
+            tensor<string, []> var_1713_equation_0 = const()[name = tensor<string, []>("op_1713_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1713_cast_fp16 = einsum(equation = var_1713_equation_0, values = (var_1617_cast_fp16_15, var_1681_cast_fp16))[name = tensor<string, []>("op_1713_cast_fp16")];
+            tensor<bool, []> input_65_interleave_0 = const()[name = tensor<string, []>("input_65_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_65_cast_fp16 = concat(axis = var_1530, interleave = input_65_interleave_0, values = (var_1683_cast_fp16, var_1685_cast_fp16, var_1687_cast_fp16, var_1689_cast_fp16, var_1691_cast_fp16, var_1693_cast_fp16, var_1695_cast_fp16, var_1697_cast_fp16, var_1699_cast_fp16, var_1701_cast_fp16, var_1703_cast_fp16, var_1705_cast_fp16, var_1707_cast_fp16, var_1709_cast_fp16, var_1711_cast_fp16, var_1713_cast_fp16))[name = tensor<string, []>("input_65_cast_fp16")];
+            tensor<string, []> var_1722_pad_type_0 = const()[name = tensor<string, []>("op_1722_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1722_strides_0 = const()[name = tensor<string, []>("op_1722_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1722_pad_0 = const()[name = tensor<string, []>("op_1722_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1722_dilations_0 = const()[name = tensor<string, []>("op_1722_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1722_groups_0 = const()[name = tensor<string, []>("op_1722_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_6_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(167307712)))];
+            tensor<fp16, [1024]> blocks_6_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(169404928)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1722_cast_fp16 = conv(bias = blocks_6_attn_out_bias_to_fp16, dilations = var_1722_dilations_0, groups = var_1722_groups_0, pad = var_1722_pad_0, pad_type = var_1722_pad_type_0, strides = var_1722_strides_0, weight = blocks_6_attn_out_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("op_1722_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = var_1722_cast_fp16)[name = tensor<string, []>("inputs_27_cast_fp16")];
+            tensor<int32, [1]> input_67_axes_0 = const()[name = tensor<string, []>("input_67_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_67_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_67_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(169407040)))];
+            tensor<fp16, [1024]> input_67_beta_0_to_fp16 = const()[name = tensor<string, []>("input_67_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(169409152)))];
+            tensor<fp16, []> var_1732_to_fp16 = const()[name = tensor<string, []>("op_1732_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_67_cast_fp16 = layer_norm(axes = input_67_axes_0, beta = input_67_beta_0_to_fp16, epsilon = var_1732_to_fp16, gamma = input_67_gamma_0_to_fp16, x = inputs_27_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
+            tensor<string, []> input_69_pad_type_0 = const()[name = tensor<string, []>("input_69_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_69_strides_0 = const()[name = tensor<string, []>("input_69_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_69_pad_0 = const()[name = tensor<string, []>("input_69_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_69_dilations_0 = const()[name = tensor<string, []>("input_69_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_69_groups_0 = const()[name = tensor<string, []>("input_69_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_6_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(169411264)))];
+            tensor<fp16, [4096]> blocks_6_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177799936)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_69_cast_fp16 = conv(bias = blocks_6_mlp_0_bias_to_fp16, dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = blocks_6_mlp_0_weight_to_fp16, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
+            tensor<string, []> input_71_mode_0 = const()[name = tensor<string, []>("input_71_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
+            tensor<string, []> var_1758_pad_type_0 = const()[name = tensor<string, []>("op_1758_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1758_strides_0 = const()[name = tensor<string, []>("op_1758_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1758_pad_0 = const()[name = tensor<string, []>("op_1758_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1758_dilations_0 = const()[name = tensor<string, []>("op_1758_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1758_groups_0 = const()[name = tensor<string, []>("op_1758_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_6_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(177808192)))];
+            tensor<fp16, [1024]> blocks_6_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186196864)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1758_cast_fp16 = conv(bias = blocks_6_mlp_2_bias_to_fp16, dilations = var_1758_dilations_0, groups = var_1758_groups_0, pad = var_1758_pad_0, pad_type = var_1758_pad_type_0, strides = var_1758_strides_0, weight = blocks_6_mlp_2_weight_to_fp16, x = input_71_cast_fp16)[name = tensor<string, []>("op_1758_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = var_1758_cast_fp16)[name = tensor<string, []>("inputs_29_cast_fp16")];
+            tensor<int32, []> var_1767 = const()[name = tensor<string, []>("op_1767"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_73_axes_0 = const()[name = tensor<string, []>("input_73_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_73_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_73_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186198976)))];
+            tensor<fp16, [1024]> input_73_beta_0_to_fp16 = const()[name = tensor<string, []>("input_73_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186201088)))];
+            tensor<fp16, []> var_1783_to_fp16 = const()[name = tensor<string, []>("op_1783_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_73_cast_fp16 = layer_norm(axes = input_73_axes_0, beta = input_73_beta_0_to_fp16, epsilon = var_1783_to_fp16, gamma = input_73_gamma_0_to_fp16, x = inputs_29_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
+            tensor<string, []> q_15_pad_type_0 = const()[name = tensor<string, []>("q_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_15_strides_0 = const()[name = tensor<string, []>("q_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_15_pad_0 = const()[name = tensor<string, []>("q_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_15_dilations_0 = const()[name = tensor<string, []>("q_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_15_groups_0 = const()[name = tensor<string, []>("q_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_1818_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1818_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186203200)))];
+            tensor<fp16, [1024]> var_1818_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1818_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188300416)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1818_cast_fp16 = conv(bias = var_1818_bias_0_to_fp16, dilations = q_15_dilations_0, groups = q_15_groups_0, pad = q_15_pad_0, pad_type = q_15_pad_type_0, strides = q_15_strides_0, weight = var_1818_weight_0_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("op_1818_cast_fp16")];
+            tensor<string, []> k_15_pad_type_0 = const()[name = tensor<string, []>("k_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_15_strides_0 = const()[name = tensor<string, []>("k_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_15_pad_0 = const()[name = tensor<string, []>("k_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_15_dilations_0 = const()[name = tensor<string, []>("k_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_15_groups_0 = const()[name = tensor<string, []>("k_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_7_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188302528)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_15_cast_fp16 = conv(dilations = k_15_dilations_0, groups = k_15_groups_0, pad = k_15_pad_0, pad_type = k_15_pad_type_0, strides = k_15_strides_0, weight = blocks_7_attn_key_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("k_15_cast_fp16")];
+            tensor<string, []> var_1816_pad_type_0 = const()[name = tensor<string, []>("op_1816_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1816_strides_0 = const()[name = tensor<string, []>("op_1816_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1816_pad_0 = const()[name = tensor<string, []>("op_1816_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1816_dilations_0 = const()[name = tensor<string, []>("op_1816_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1816_groups_0 = const()[name = tensor<string, []>("op_1816_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_7_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(190399744)))];
+            tensor<fp16, [1024]> blocks_7_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192496960)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1816_cast_fp16 = conv(bias = blocks_7_attn_value_bias_to_fp16, dilations = var_1816_dilations_0, groups = var_1816_groups_0, pad = var_1816_pad_0, pad_type = var_1816_pad_type_0, strides = var_1816_strides_0, weight = blocks_7_attn_value_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("op_1816_cast_fp16")];
+            tensor<int32, [16]> tile_21 = const()[name = tensor<string, []>("tile_21"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1819_axis_0 = const()[name = tensor<string, []>("op_1819_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1819_cast_fp16_15 = split(axis = var_1819_axis_0, split_sizes = tile_21, x = var_1818_cast_fp16)[name = tensor<string, []>("op_1819_cast_fp16")];
+            tensor<int32, [4]> var_1836_perm_0 = const()[name = tensor<string, []>("op_1836_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_22 = const()[name = tensor<string, []>("tile_22"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1837_axis_0 = const()[name = tensor<string, []>("op_1837_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_1836_cast_fp16 = transpose(perm = var_1836_perm_0, x = k_15_cast_fp16)[name = tensor<string, []>("transpose_17")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_1837_cast_fp16_15 = split(axis = var_1837_axis_0, split_sizes = tile_22, x = var_1836_cast_fp16)[name = tensor<string, []>("op_1837_cast_fp16")];
+            tensor<int32, [16]> tile_23 = const()[name = tensor<string, []>("tile_23"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1854_axis_0 = const()[name = tensor<string, []>("op_1854_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_1854_cast_fp16_15 = split(axis = var_1854_axis_0, split_sizes = tile_23, x = var_1816_cast_fp16)[name = tensor<string, []>("op_1854_cast_fp16")];
+            tensor<string, []> aw_225_equation_0 = const()[name = tensor<string, []>("aw_225_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_225_cast_fp16 = einsum(equation = aw_225_equation_0, values = (var_1837_cast_fp16_0, var_1819_cast_fp16_0))[name = tensor<string, []>("aw_225_cast_fp16")];
+            tensor<string, []> aw_227_equation_0 = const()[name = tensor<string, []>("aw_227_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_227_cast_fp16 = einsum(equation = aw_227_equation_0, values = (var_1837_cast_fp16_1, var_1819_cast_fp16_1))[name = tensor<string, []>("aw_227_cast_fp16")];
+            tensor<string, []> aw_229_equation_0 = const()[name = tensor<string, []>("aw_229_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_229_cast_fp16 = einsum(equation = aw_229_equation_0, values = (var_1837_cast_fp16_2, var_1819_cast_fp16_2))[name = tensor<string, []>("aw_229_cast_fp16")];
+            tensor<string, []> aw_231_equation_0 = const()[name = tensor<string, []>("aw_231_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_231_cast_fp16 = einsum(equation = aw_231_equation_0, values = (var_1837_cast_fp16_3, var_1819_cast_fp16_3))[name = tensor<string, []>("aw_231_cast_fp16")];
+            tensor<string, []> aw_233_equation_0 = const()[name = tensor<string, []>("aw_233_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_233_cast_fp16 = einsum(equation = aw_233_equation_0, values = (var_1837_cast_fp16_4, var_1819_cast_fp16_4))[name = tensor<string, []>("aw_233_cast_fp16")];
+            tensor<string, []> aw_235_equation_0 = const()[name = tensor<string, []>("aw_235_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_235_cast_fp16 = einsum(equation = aw_235_equation_0, values = (var_1837_cast_fp16_5, var_1819_cast_fp16_5))[name = tensor<string, []>("aw_235_cast_fp16")];
+            tensor<string, []> aw_237_equation_0 = const()[name = tensor<string, []>("aw_237_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_237_cast_fp16 = einsum(equation = aw_237_equation_0, values = (var_1837_cast_fp16_6, var_1819_cast_fp16_6))[name = tensor<string, []>("aw_237_cast_fp16")];
+            tensor<string, []> aw_239_equation_0 = const()[name = tensor<string, []>("aw_239_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_239_cast_fp16 = einsum(equation = aw_239_equation_0, values = (var_1837_cast_fp16_7, var_1819_cast_fp16_7))[name = tensor<string, []>("aw_239_cast_fp16")];
+            tensor<string, []> aw_241_equation_0 = const()[name = tensor<string, []>("aw_241_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_241_cast_fp16 = einsum(equation = aw_241_equation_0, values = (var_1837_cast_fp16_8, var_1819_cast_fp16_8))[name = tensor<string, []>("aw_241_cast_fp16")];
+            tensor<string, []> aw_243_equation_0 = const()[name = tensor<string, []>("aw_243_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_243_cast_fp16 = einsum(equation = aw_243_equation_0, values = (var_1837_cast_fp16_9, var_1819_cast_fp16_9))[name = tensor<string, []>("aw_243_cast_fp16")];
+            tensor<string, []> aw_245_equation_0 = const()[name = tensor<string, []>("aw_245_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_245_cast_fp16 = einsum(equation = aw_245_equation_0, values = (var_1837_cast_fp16_10, var_1819_cast_fp16_10))[name = tensor<string, []>("aw_245_cast_fp16")];
+            tensor<string, []> aw_247_equation_0 = const()[name = tensor<string, []>("aw_247_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_247_cast_fp16 = einsum(equation = aw_247_equation_0, values = (var_1837_cast_fp16_11, var_1819_cast_fp16_11))[name = tensor<string, []>("aw_247_cast_fp16")];
+            tensor<string, []> aw_249_equation_0 = const()[name = tensor<string, []>("aw_249_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_249_cast_fp16 = einsum(equation = aw_249_equation_0, values = (var_1837_cast_fp16_12, var_1819_cast_fp16_12))[name = tensor<string, []>("aw_249_cast_fp16")];
+            tensor<string, []> aw_251_equation_0 = const()[name = tensor<string, []>("aw_251_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_251_cast_fp16 = einsum(equation = aw_251_equation_0, values = (var_1837_cast_fp16_13, var_1819_cast_fp16_13))[name = tensor<string, []>("aw_251_cast_fp16")];
+            tensor<string, []> aw_253_equation_0 = const()[name = tensor<string, []>("aw_253_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_253_cast_fp16 = einsum(equation = aw_253_equation_0, values = (var_1837_cast_fp16_14, var_1819_cast_fp16_14))[name = tensor<string, []>("aw_253_cast_fp16")];
+            tensor<string, []> aw_255_equation_0 = const()[name = tensor<string, []>("aw_255_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_255_cast_fp16 = einsum(equation = aw_255_equation_0, values = (var_1837_cast_fp16_15, var_1819_cast_fp16_15))[name = tensor<string, []>("aw_255_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1903_cast_fp16 = softmax(axis = var_1767, x = aw_225_cast_fp16)[name = tensor<string, []>("op_1903_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1904_cast_fp16 = softmax(axis = var_1767, x = aw_227_cast_fp16)[name = tensor<string, []>("op_1904_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1905_cast_fp16 = softmax(axis = var_1767, x = aw_229_cast_fp16)[name = tensor<string, []>("op_1905_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1906_cast_fp16 = softmax(axis = var_1767, x = aw_231_cast_fp16)[name = tensor<string, []>("op_1906_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1907_cast_fp16 = softmax(axis = var_1767, x = aw_233_cast_fp16)[name = tensor<string, []>("op_1907_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1908_cast_fp16 = softmax(axis = var_1767, x = aw_235_cast_fp16)[name = tensor<string, []>("op_1908_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1909_cast_fp16 = softmax(axis = var_1767, x = aw_237_cast_fp16)[name = tensor<string, []>("op_1909_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1910_cast_fp16 = softmax(axis = var_1767, x = aw_239_cast_fp16)[name = tensor<string, []>("op_1910_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1911_cast_fp16 = softmax(axis = var_1767, x = aw_241_cast_fp16)[name = tensor<string, []>("op_1911_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1912_cast_fp16 = softmax(axis = var_1767, x = aw_243_cast_fp16)[name = tensor<string, []>("op_1912_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1913_cast_fp16 = softmax(axis = var_1767, x = aw_245_cast_fp16)[name = tensor<string, []>("op_1913_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1914_cast_fp16 = softmax(axis = var_1767, x = aw_247_cast_fp16)[name = tensor<string, []>("op_1914_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1915_cast_fp16 = softmax(axis = var_1767, x = aw_249_cast_fp16)[name = tensor<string, []>("op_1915_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1916_cast_fp16 = softmax(axis = var_1767, x = aw_251_cast_fp16)[name = tensor<string, []>("op_1916_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1917_cast_fp16 = softmax(axis = var_1767, x = aw_253_cast_fp16)[name = tensor<string, []>("op_1917_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1918_cast_fp16 = softmax(axis = var_1767, x = aw_255_cast_fp16)[name = tensor<string, []>("op_1918_cast_fp16")];
+            tensor<string, []> var_1920_equation_0 = const()[name = tensor<string, []>("op_1920_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1920_cast_fp16 = einsum(equation = var_1920_equation_0, values = (var_1854_cast_fp16_0, var_1903_cast_fp16))[name = tensor<string, []>("op_1920_cast_fp16")];
+            tensor<string, []> var_1922_equation_0 = const()[name = tensor<string, []>("op_1922_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1922_cast_fp16 = einsum(equation = var_1922_equation_0, values = (var_1854_cast_fp16_1, var_1904_cast_fp16))[name = tensor<string, []>("op_1922_cast_fp16")];
+            tensor<string, []> var_1924_equation_0 = const()[name = tensor<string, []>("op_1924_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1924_cast_fp16 = einsum(equation = var_1924_equation_0, values = (var_1854_cast_fp16_2, var_1905_cast_fp16))[name = tensor<string, []>("op_1924_cast_fp16")];
+            tensor<string, []> var_1926_equation_0 = const()[name = tensor<string, []>("op_1926_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1926_cast_fp16 = einsum(equation = var_1926_equation_0, values = (var_1854_cast_fp16_3, var_1906_cast_fp16))[name = tensor<string, []>("op_1926_cast_fp16")];
+            tensor<string, []> var_1928_equation_0 = const()[name = tensor<string, []>("op_1928_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1928_cast_fp16 = einsum(equation = var_1928_equation_0, values = (var_1854_cast_fp16_4, var_1907_cast_fp16))[name = tensor<string, []>("op_1928_cast_fp16")];
+            tensor<string, []> var_1930_equation_0 = const()[name = tensor<string, []>("op_1930_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1930_cast_fp16 = einsum(equation = var_1930_equation_0, values = (var_1854_cast_fp16_5, var_1908_cast_fp16))[name = tensor<string, []>("op_1930_cast_fp16")];
+            tensor<string, []> var_1932_equation_0 = const()[name = tensor<string, []>("op_1932_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1932_cast_fp16 = einsum(equation = var_1932_equation_0, values = (var_1854_cast_fp16_6, var_1909_cast_fp16))[name = tensor<string, []>("op_1932_cast_fp16")];
+            tensor<string, []> var_1934_equation_0 = const()[name = tensor<string, []>("op_1934_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1934_cast_fp16 = einsum(equation = var_1934_equation_0, values = (var_1854_cast_fp16_7, var_1910_cast_fp16))[name = tensor<string, []>("op_1934_cast_fp16")];
+            tensor<string, []> var_1936_equation_0 = const()[name = tensor<string, []>("op_1936_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1936_cast_fp16 = einsum(equation = var_1936_equation_0, values = (var_1854_cast_fp16_8, var_1911_cast_fp16))[name = tensor<string, []>("op_1936_cast_fp16")];
+            tensor<string, []> var_1938_equation_0 = const()[name = tensor<string, []>("op_1938_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1938_cast_fp16 = einsum(equation = var_1938_equation_0, values = (var_1854_cast_fp16_9, var_1912_cast_fp16))[name = tensor<string, []>("op_1938_cast_fp16")];
+            tensor<string, []> var_1940_equation_0 = const()[name = tensor<string, []>("op_1940_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1940_cast_fp16 = einsum(equation = var_1940_equation_0, values = (var_1854_cast_fp16_10, var_1913_cast_fp16))[name = tensor<string, []>("op_1940_cast_fp16")];
+            tensor<string, []> var_1942_equation_0 = const()[name = tensor<string, []>("op_1942_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1942_cast_fp16 = einsum(equation = var_1942_equation_0, values = (var_1854_cast_fp16_11, var_1914_cast_fp16))[name = tensor<string, []>("op_1942_cast_fp16")];
+            tensor<string, []> var_1944_equation_0 = const()[name = tensor<string, []>("op_1944_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1944_cast_fp16 = einsum(equation = var_1944_equation_0, values = (var_1854_cast_fp16_12, var_1915_cast_fp16))[name = tensor<string, []>("op_1944_cast_fp16")];
+            tensor<string, []> var_1946_equation_0 = const()[name = tensor<string, []>("op_1946_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1946_cast_fp16 = einsum(equation = var_1946_equation_0, values = (var_1854_cast_fp16_13, var_1916_cast_fp16))[name = tensor<string, []>("op_1946_cast_fp16")];
+            tensor<string, []> var_1948_equation_0 = const()[name = tensor<string, []>("op_1948_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1948_cast_fp16 = einsum(equation = var_1948_equation_0, values = (var_1854_cast_fp16_14, var_1917_cast_fp16))[name = tensor<string, []>("op_1948_cast_fp16")];
+            tensor<string, []> var_1950_equation_0 = const()[name = tensor<string, []>("op_1950_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1950_cast_fp16 = einsum(equation = var_1950_equation_0, values = (var_1854_cast_fp16_15, var_1918_cast_fp16))[name = tensor<string, []>("op_1950_cast_fp16")];
+            tensor<bool, []> input_75_interleave_0 = const()[name = tensor<string, []>("input_75_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_75_cast_fp16 = concat(axis = var_1767, interleave = input_75_interleave_0, values = (var_1920_cast_fp16, var_1922_cast_fp16, var_1924_cast_fp16, var_1926_cast_fp16, var_1928_cast_fp16, var_1930_cast_fp16, var_1932_cast_fp16, var_1934_cast_fp16, var_1936_cast_fp16, var_1938_cast_fp16, var_1940_cast_fp16, var_1942_cast_fp16, var_1944_cast_fp16, var_1946_cast_fp16, var_1948_cast_fp16, var_1950_cast_fp16))[name = tensor<string, []>("input_75_cast_fp16")];
+            tensor<string, []> var_1959_pad_type_0 = const()[name = tensor<string, []>("op_1959_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1959_strides_0 = const()[name = tensor<string, []>("op_1959_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1959_pad_0 = const()[name = tensor<string, []>("op_1959_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1959_dilations_0 = const()[name = tensor<string, []>("op_1959_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1959_groups_0 = const()[name = tensor<string, []>("op_1959_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_7_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192499072)))];
+            tensor<fp16, [1024]> blocks_7_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(194596288)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1959_cast_fp16 = conv(bias = blocks_7_attn_out_bias_to_fp16, dilations = var_1959_dilations_0, groups = var_1959_groups_0, pad = var_1959_pad_0, pad_type = var_1959_pad_type_0, strides = var_1959_strides_0, weight = blocks_7_attn_out_weight_to_fp16, x = input_75_cast_fp16)[name = tensor<string, []>("op_1959_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = var_1959_cast_fp16)[name = tensor<string, []>("inputs_31_cast_fp16")];
+            tensor<int32, [1]> input_77_axes_0 = const()[name = tensor<string, []>("input_77_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_77_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_77_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(194598400)))];
+            tensor<fp16, [1024]> input_77_beta_0_to_fp16 = const()[name = tensor<string, []>("input_77_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(194600512)))];
+            tensor<fp16, []> var_1969_to_fp16 = const()[name = tensor<string, []>("op_1969_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_77_cast_fp16 = layer_norm(axes = input_77_axes_0, beta = input_77_beta_0_to_fp16, epsilon = var_1969_to_fp16, gamma = input_77_gamma_0_to_fp16, x = inputs_31_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
+            tensor<string, []> input_79_pad_type_0 = const()[name = tensor<string, []>("input_79_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_79_strides_0 = const()[name = tensor<string, []>("input_79_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_79_pad_0 = const()[name = tensor<string, []>("input_79_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_79_dilations_0 = const()[name = tensor<string, []>("input_79_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_79_groups_0 = const()[name = tensor<string, []>("input_79_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_7_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(194602624)))];
+            tensor<fp16, [4096]> blocks_7_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202991296)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_79_cast_fp16 = conv(bias = blocks_7_mlp_0_bias_to_fp16, dilations = input_79_dilations_0, groups = input_79_groups_0, pad = input_79_pad_0, pad_type = input_79_pad_type_0, strides = input_79_strides_0, weight = blocks_7_mlp_0_weight_to_fp16, x = input_77_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
+            tensor<string, []> input_81_mode_0 = const()[name = tensor<string, []>("input_81_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_81_cast_fp16 = gelu(mode = input_81_mode_0, x = input_79_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
+            tensor<string, []> var_1995_pad_type_0 = const()[name = tensor<string, []>("op_1995_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1995_strides_0 = const()[name = tensor<string, []>("op_1995_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1995_pad_0 = const()[name = tensor<string, []>("op_1995_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1995_dilations_0 = const()[name = tensor<string, []>("op_1995_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1995_groups_0 = const()[name = tensor<string, []>("op_1995_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_7_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202999552)))];
+            tensor<fp16, [1024]> blocks_7_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211388224)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_1995_cast_fp16 = conv(bias = blocks_7_mlp_2_bias_to_fp16, dilations = var_1995_dilations_0, groups = var_1995_groups_0, pad = var_1995_pad_0, pad_type = var_1995_pad_type_0, strides = var_1995_strides_0, weight = blocks_7_mlp_2_weight_to_fp16, x = input_81_cast_fp16)[name = tensor<string, []>("op_1995_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = var_1995_cast_fp16)[name = tensor<string, []>("inputs_33_cast_fp16")];
+            tensor<int32, []> var_2004 = const()[name = tensor<string, []>("op_2004"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_83_axes_0 = const()[name = tensor<string, []>("input_83_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_83_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_83_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211390336)))];
+            tensor<fp16, [1024]> input_83_beta_0_to_fp16 = const()[name = tensor<string, []>("input_83_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211392448)))];
+            tensor<fp16, []> var_2020_to_fp16 = const()[name = tensor<string, []>("op_2020_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_83_cast_fp16 = layer_norm(axes = input_83_axes_0, beta = input_83_beta_0_to_fp16, epsilon = var_2020_to_fp16, gamma = input_83_gamma_0_to_fp16, x = inputs_33_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
+            tensor<string, []> q_17_pad_type_0 = const()[name = tensor<string, []>("q_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_17_strides_0 = const()[name = tensor<string, []>("q_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_17_pad_0 = const()[name = tensor<string, []>("q_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_17_dilations_0 = const()[name = tensor<string, []>("q_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_17_groups_0 = const()[name = tensor<string, []>("q_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_2055_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2055_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(211394560)))];
+            tensor<fp16, [1024]> var_2055_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2055_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213491776)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2055_cast_fp16 = conv(bias = var_2055_bias_0_to_fp16, dilations = q_17_dilations_0, groups = q_17_groups_0, pad = q_17_pad_0, pad_type = q_17_pad_type_0, strides = q_17_strides_0, weight = var_2055_weight_0_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("op_2055_cast_fp16")];
+            tensor<string, []> k_17_pad_type_0 = const()[name = tensor<string, []>("k_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_17_strides_0 = const()[name = tensor<string, []>("k_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_17_pad_0 = const()[name = tensor<string, []>("k_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_17_dilations_0 = const()[name = tensor<string, []>("k_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_17_groups_0 = const()[name = tensor<string, []>("k_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_8_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213493888)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_17_cast_fp16 = conv(dilations = k_17_dilations_0, groups = k_17_groups_0, pad = k_17_pad_0, pad_type = k_17_pad_type_0, strides = k_17_strides_0, weight = blocks_8_attn_key_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("k_17_cast_fp16")];
+            tensor<string, []> var_2053_pad_type_0 = const()[name = tensor<string, []>("op_2053_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2053_strides_0 = const()[name = tensor<string, []>("op_2053_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2053_pad_0 = const()[name = tensor<string, []>("op_2053_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2053_dilations_0 = const()[name = tensor<string, []>("op_2053_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2053_groups_0 = const()[name = tensor<string, []>("op_2053_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_8_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(215591104)))];
+            tensor<fp16, [1024]> blocks_8_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217688320)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2053_cast_fp16 = conv(bias = blocks_8_attn_value_bias_to_fp16, dilations = var_2053_dilations_0, groups = var_2053_groups_0, pad = var_2053_pad_0, pad_type = var_2053_pad_type_0, strides = var_2053_strides_0, weight = blocks_8_attn_value_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("op_2053_cast_fp16")];
+            tensor<int32, [16]> tile_24 = const()[name = tensor<string, []>("tile_24"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2056_axis_0 = const()[name = tensor<string, []>("op_2056_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2056_cast_fp16_15 = split(axis = var_2056_axis_0, split_sizes = tile_24, x = var_2055_cast_fp16)[name = tensor<string, []>("op_2056_cast_fp16")];
+            tensor<int32, [4]> var_2073_perm_0 = const()[name = tensor<string, []>("op_2073_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_25 = const()[name = tensor<string, []>("tile_25"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2074_axis_0 = const()[name = tensor<string, []>("op_2074_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_2073_cast_fp16 = transpose(perm = var_2073_perm_0, x = k_17_cast_fp16)[name = tensor<string, []>("transpose_16")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2074_cast_fp16_15 = split(axis = var_2074_axis_0, split_sizes = tile_25, x = var_2073_cast_fp16)[name = tensor<string, []>("op_2074_cast_fp16")];
+            tensor<int32, [16]> tile_26 = const()[name = tensor<string, []>("tile_26"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2091_axis_0 = const()[name = tensor<string, []>("op_2091_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2091_cast_fp16_15 = split(axis = var_2091_axis_0, split_sizes = tile_26, x = var_2053_cast_fp16)[name = tensor<string, []>("op_2091_cast_fp16")];
+            tensor<string, []> aw_257_equation_0 = const()[name = tensor<string, []>("aw_257_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_257_cast_fp16 = einsum(equation = aw_257_equation_0, values = (var_2074_cast_fp16_0, var_2056_cast_fp16_0))[name = tensor<string, []>("aw_257_cast_fp16")];
+            tensor<string, []> aw_259_equation_0 = const()[name = tensor<string, []>("aw_259_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_259_cast_fp16 = einsum(equation = aw_259_equation_0, values = (var_2074_cast_fp16_1, var_2056_cast_fp16_1))[name = tensor<string, []>("aw_259_cast_fp16")];
+            tensor<string, []> aw_261_equation_0 = const()[name = tensor<string, []>("aw_261_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_261_cast_fp16 = einsum(equation = aw_261_equation_0, values = (var_2074_cast_fp16_2, var_2056_cast_fp16_2))[name = tensor<string, []>("aw_261_cast_fp16")];
+            tensor<string, []> aw_263_equation_0 = const()[name = tensor<string, []>("aw_263_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_263_cast_fp16 = einsum(equation = aw_263_equation_0, values = (var_2074_cast_fp16_3, var_2056_cast_fp16_3))[name = tensor<string, []>("aw_263_cast_fp16")];
+            tensor<string, []> aw_265_equation_0 = const()[name = tensor<string, []>("aw_265_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_265_cast_fp16 = einsum(equation = aw_265_equation_0, values = (var_2074_cast_fp16_4, var_2056_cast_fp16_4))[name = tensor<string, []>("aw_265_cast_fp16")];
+            tensor<string, []> aw_267_equation_0 = const()[name = tensor<string, []>("aw_267_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_267_cast_fp16 = einsum(equation = aw_267_equation_0, values = (var_2074_cast_fp16_5, var_2056_cast_fp16_5))[name = tensor<string, []>("aw_267_cast_fp16")];
+            tensor<string, []> aw_269_equation_0 = const()[name = tensor<string, []>("aw_269_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_269_cast_fp16 = einsum(equation = aw_269_equation_0, values = (var_2074_cast_fp16_6, var_2056_cast_fp16_6))[name = tensor<string, []>("aw_269_cast_fp16")];
+            tensor<string, []> aw_271_equation_0 = const()[name = tensor<string, []>("aw_271_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_271_cast_fp16 = einsum(equation = aw_271_equation_0, values = (var_2074_cast_fp16_7, var_2056_cast_fp16_7))[name = tensor<string, []>("aw_271_cast_fp16")];
+            tensor<string, []> aw_273_equation_0 = const()[name = tensor<string, []>("aw_273_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_273_cast_fp16 = einsum(equation = aw_273_equation_0, values = (var_2074_cast_fp16_8, var_2056_cast_fp16_8))[name = tensor<string, []>("aw_273_cast_fp16")];
+            tensor<string, []> aw_275_equation_0 = const()[name = tensor<string, []>("aw_275_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_275_cast_fp16 = einsum(equation = aw_275_equation_0, values = (var_2074_cast_fp16_9, var_2056_cast_fp16_9))[name = tensor<string, []>("aw_275_cast_fp16")];
+            tensor<string, []> aw_277_equation_0 = const()[name = tensor<string, []>("aw_277_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_277_cast_fp16 = einsum(equation = aw_277_equation_0, values = (var_2074_cast_fp16_10, var_2056_cast_fp16_10))[name = tensor<string, []>("aw_277_cast_fp16")];
+            tensor<string, []> aw_279_equation_0 = const()[name = tensor<string, []>("aw_279_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_279_cast_fp16 = einsum(equation = aw_279_equation_0, values = (var_2074_cast_fp16_11, var_2056_cast_fp16_11))[name = tensor<string, []>("aw_279_cast_fp16")];
+            tensor<string, []> aw_281_equation_0 = const()[name = tensor<string, []>("aw_281_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_281_cast_fp16 = einsum(equation = aw_281_equation_0, values = (var_2074_cast_fp16_12, var_2056_cast_fp16_12))[name = tensor<string, []>("aw_281_cast_fp16")];
+            tensor<string, []> aw_283_equation_0 = const()[name = tensor<string, []>("aw_283_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_283_cast_fp16 = einsum(equation = aw_283_equation_0, values = (var_2074_cast_fp16_13, var_2056_cast_fp16_13))[name = tensor<string, []>("aw_283_cast_fp16")];
+            tensor<string, []> aw_285_equation_0 = const()[name = tensor<string, []>("aw_285_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_285_cast_fp16 = einsum(equation = aw_285_equation_0, values = (var_2074_cast_fp16_14, var_2056_cast_fp16_14))[name = tensor<string, []>("aw_285_cast_fp16")];
+            tensor<string, []> aw_287_equation_0 = const()[name = tensor<string, []>("aw_287_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_287_cast_fp16 = einsum(equation = aw_287_equation_0, values = (var_2074_cast_fp16_15, var_2056_cast_fp16_15))[name = tensor<string, []>("aw_287_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2140_cast_fp16 = softmax(axis = var_2004, x = aw_257_cast_fp16)[name = tensor<string, []>("op_2140_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2141_cast_fp16 = softmax(axis = var_2004, x = aw_259_cast_fp16)[name = tensor<string, []>("op_2141_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2142_cast_fp16 = softmax(axis = var_2004, x = aw_261_cast_fp16)[name = tensor<string, []>("op_2142_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2143_cast_fp16 = softmax(axis = var_2004, x = aw_263_cast_fp16)[name = tensor<string, []>("op_2143_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2144_cast_fp16 = softmax(axis = var_2004, x = aw_265_cast_fp16)[name = tensor<string, []>("op_2144_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2145_cast_fp16 = softmax(axis = var_2004, x = aw_267_cast_fp16)[name = tensor<string, []>("op_2145_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2146_cast_fp16 = softmax(axis = var_2004, x = aw_269_cast_fp16)[name = tensor<string, []>("op_2146_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2147_cast_fp16 = softmax(axis = var_2004, x = aw_271_cast_fp16)[name = tensor<string, []>("op_2147_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2148_cast_fp16 = softmax(axis = var_2004, x = aw_273_cast_fp16)[name = tensor<string, []>("op_2148_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2149_cast_fp16 = softmax(axis = var_2004, x = aw_275_cast_fp16)[name = tensor<string, []>("op_2149_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2150_cast_fp16 = softmax(axis = var_2004, x = aw_277_cast_fp16)[name = tensor<string, []>("op_2150_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2151_cast_fp16 = softmax(axis = var_2004, x = aw_279_cast_fp16)[name = tensor<string, []>("op_2151_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2152_cast_fp16 = softmax(axis = var_2004, x = aw_281_cast_fp16)[name = tensor<string, []>("op_2152_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2153_cast_fp16 = softmax(axis = var_2004, x = aw_283_cast_fp16)[name = tensor<string, []>("op_2153_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2154_cast_fp16 = softmax(axis = var_2004, x = aw_285_cast_fp16)[name = tensor<string, []>("op_2154_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2155_cast_fp16 = softmax(axis = var_2004, x = aw_287_cast_fp16)[name = tensor<string, []>("op_2155_cast_fp16")];
+            tensor<string, []> var_2157_equation_0 = const()[name = tensor<string, []>("op_2157_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2157_cast_fp16 = einsum(equation = var_2157_equation_0, values = (var_2091_cast_fp16_0, var_2140_cast_fp16))[name = tensor<string, []>("op_2157_cast_fp16")];
+            tensor<string, []> var_2159_equation_0 = const()[name = tensor<string, []>("op_2159_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2159_cast_fp16 = einsum(equation = var_2159_equation_0, values = (var_2091_cast_fp16_1, var_2141_cast_fp16))[name = tensor<string, []>("op_2159_cast_fp16")];
+            tensor<string, []> var_2161_equation_0 = const()[name = tensor<string, []>("op_2161_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2161_cast_fp16 = einsum(equation = var_2161_equation_0, values = (var_2091_cast_fp16_2, var_2142_cast_fp16))[name = tensor<string, []>("op_2161_cast_fp16")];
+            tensor<string, []> var_2163_equation_0 = const()[name = tensor<string, []>("op_2163_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2163_cast_fp16 = einsum(equation = var_2163_equation_0, values = (var_2091_cast_fp16_3, var_2143_cast_fp16))[name = tensor<string, []>("op_2163_cast_fp16")];
+            tensor<string, []> var_2165_equation_0 = const()[name = tensor<string, []>("op_2165_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2165_cast_fp16 = einsum(equation = var_2165_equation_0, values = (var_2091_cast_fp16_4, var_2144_cast_fp16))[name = tensor<string, []>("op_2165_cast_fp16")];
+            tensor<string, []> var_2167_equation_0 = const()[name = tensor<string, []>("op_2167_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2167_cast_fp16 = einsum(equation = var_2167_equation_0, values = (var_2091_cast_fp16_5, var_2145_cast_fp16))[name = tensor<string, []>("op_2167_cast_fp16")];
+            tensor<string, []> var_2169_equation_0 = const()[name = tensor<string, []>("op_2169_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2169_cast_fp16 = einsum(equation = var_2169_equation_0, values = (var_2091_cast_fp16_6, var_2146_cast_fp16))[name = tensor<string, []>("op_2169_cast_fp16")];
+            tensor<string, []> var_2171_equation_0 = const()[name = tensor<string, []>("op_2171_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2171_cast_fp16 = einsum(equation = var_2171_equation_0, values = (var_2091_cast_fp16_7, var_2147_cast_fp16))[name = tensor<string, []>("op_2171_cast_fp16")];
+            tensor<string, []> var_2173_equation_0 = const()[name = tensor<string, []>("op_2173_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2173_cast_fp16 = einsum(equation = var_2173_equation_0, values = (var_2091_cast_fp16_8, var_2148_cast_fp16))[name = tensor<string, []>("op_2173_cast_fp16")];
+            tensor<string, []> var_2175_equation_0 = const()[name = tensor<string, []>("op_2175_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2175_cast_fp16 = einsum(equation = var_2175_equation_0, values = (var_2091_cast_fp16_9, var_2149_cast_fp16))[name = tensor<string, []>("op_2175_cast_fp16")];
+            tensor<string, []> var_2177_equation_0 = const()[name = tensor<string, []>("op_2177_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2177_cast_fp16 = einsum(equation = var_2177_equation_0, values = (var_2091_cast_fp16_10, var_2150_cast_fp16))[name = tensor<string, []>("op_2177_cast_fp16")];
+            tensor<string, []> var_2179_equation_0 = const()[name = tensor<string, []>("op_2179_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2179_cast_fp16 = einsum(equation = var_2179_equation_0, values = (var_2091_cast_fp16_11, var_2151_cast_fp16))[name = tensor<string, []>("op_2179_cast_fp16")];
+            tensor<string, []> var_2181_equation_0 = const()[name = tensor<string, []>("op_2181_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2181_cast_fp16 = einsum(equation = var_2181_equation_0, values = (var_2091_cast_fp16_12, var_2152_cast_fp16))[name = tensor<string, []>("op_2181_cast_fp16")];
+            tensor<string, []> var_2183_equation_0 = const()[name = tensor<string, []>("op_2183_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2183_cast_fp16 = einsum(equation = var_2183_equation_0, values = (var_2091_cast_fp16_13, var_2153_cast_fp16))[name = tensor<string, []>("op_2183_cast_fp16")];
+            tensor<string, []> var_2185_equation_0 = const()[name = tensor<string, []>("op_2185_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2185_cast_fp16 = einsum(equation = var_2185_equation_0, values = (var_2091_cast_fp16_14, var_2154_cast_fp16))[name = tensor<string, []>("op_2185_cast_fp16")];
+            tensor<string, []> var_2187_equation_0 = const()[name = tensor<string, []>("op_2187_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2187_cast_fp16 = einsum(equation = var_2187_equation_0, values = (var_2091_cast_fp16_15, var_2155_cast_fp16))[name = tensor<string, []>("op_2187_cast_fp16")];
+            tensor<bool, []> input_85_interleave_0 = const()[name = tensor<string, []>("input_85_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_85_cast_fp16 = concat(axis = var_2004, interleave = input_85_interleave_0, values = (var_2157_cast_fp16, var_2159_cast_fp16, var_2161_cast_fp16, var_2163_cast_fp16, var_2165_cast_fp16, var_2167_cast_fp16, var_2169_cast_fp16, var_2171_cast_fp16, var_2173_cast_fp16, var_2175_cast_fp16, var_2177_cast_fp16, var_2179_cast_fp16, var_2181_cast_fp16, var_2183_cast_fp16, var_2185_cast_fp16, var_2187_cast_fp16))[name = tensor<string, []>("input_85_cast_fp16")];
+            tensor<string, []> var_2196_pad_type_0 = const()[name = tensor<string, []>("op_2196_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2196_strides_0 = const()[name = tensor<string, []>("op_2196_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2196_pad_0 = const()[name = tensor<string, []>("op_2196_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2196_dilations_0 = const()[name = tensor<string, []>("op_2196_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2196_groups_0 = const()[name = tensor<string, []>("op_2196_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_8_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(217690432)))];
+            tensor<fp16, [1024]> blocks_8_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(219787648)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2196_cast_fp16 = conv(bias = blocks_8_attn_out_bias_to_fp16, dilations = var_2196_dilations_0, groups = var_2196_groups_0, pad = var_2196_pad_0, pad_type = var_2196_pad_type_0, strides = var_2196_strides_0, weight = blocks_8_attn_out_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("op_2196_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = var_2196_cast_fp16)[name = tensor<string, []>("inputs_35_cast_fp16")];
+            tensor<int32, [1]> input_87_axes_0 = const()[name = tensor<string, []>("input_87_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_87_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_87_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(219789760)))];
+            tensor<fp16, [1024]> input_87_beta_0_to_fp16 = const()[name = tensor<string, []>("input_87_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(219791872)))];
+            tensor<fp16, []> var_2206_to_fp16 = const()[name = tensor<string, []>("op_2206_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_87_cast_fp16 = layer_norm(axes = input_87_axes_0, beta = input_87_beta_0_to_fp16, epsilon = var_2206_to_fp16, gamma = input_87_gamma_0_to_fp16, x = inputs_35_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
+            tensor<string, []> input_89_pad_type_0 = const()[name = tensor<string, []>("input_89_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_89_strides_0 = const()[name = tensor<string, []>("input_89_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_89_pad_0 = const()[name = tensor<string, []>("input_89_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_89_dilations_0 = const()[name = tensor<string, []>("input_89_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_89_groups_0 = const()[name = tensor<string, []>("input_89_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_8_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(219793984)))];
+            tensor<fp16, [4096]> blocks_8_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(228182656)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_89_cast_fp16 = conv(bias = blocks_8_mlp_0_bias_to_fp16, dilations = input_89_dilations_0, groups = input_89_groups_0, pad = input_89_pad_0, pad_type = input_89_pad_type_0, strides = input_89_strides_0, weight = blocks_8_mlp_0_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
+            tensor<string, []> input_91_mode_0 = const()[name = tensor<string, []>("input_91_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_91_cast_fp16 = gelu(mode = input_91_mode_0, x = input_89_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
+            tensor<string, []> var_2232_pad_type_0 = const()[name = tensor<string, []>("op_2232_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2232_strides_0 = const()[name = tensor<string, []>("op_2232_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2232_pad_0 = const()[name = tensor<string, []>("op_2232_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2232_dilations_0 = const()[name = tensor<string, []>("op_2232_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2232_groups_0 = const()[name = tensor<string, []>("op_2232_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_8_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(228190912)))];
+            tensor<fp16, [1024]> blocks_8_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236579584)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2232_cast_fp16 = conv(bias = blocks_8_mlp_2_bias_to_fp16, dilations = var_2232_dilations_0, groups = var_2232_groups_0, pad = var_2232_pad_0, pad_type = var_2232_pad_type_0, strides = var_2232_strides_0, weight = blocks_8_mlp_2_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("op_2232_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = var_2232_cast_fp16)[name = tensor<string, []>("inputs_37_cast_fp16")];
+            tensor<int32, []> var_2241 = const()[name = tensor<string, []>("op_2241"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_93_axes_0 = const()[name = tensor<string, []>("input_93_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_93_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_93_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236581696)))];
+            tensor<fp16, [1024]> input_93_beta_0_to_fp16 = const()[name = tensor<string, []>("input_93_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236583808)))];
+            tensor<fp16, []> var_2257_to_fp16 = const()[name = tensor<string, []>("op_2257_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_93_cast_fp16 = layer_norm(axes = input_93_axes_0, beta = input_93_beta_0_to_fp16, epsilon = var_2257_to_fp16, gamma = input_93_gamma_0_to_fp16, x = inputs_37_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
+            tensor<string, []> q_19_pad_type_0 = const()[name = tensor<string, []>("q_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_19_strides_0 = const()[name = tensor<string, []>("q_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_19_pad_0 = const()[name = tensor<string, []>("q_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_19_dilations_0 = const()[name = tensor<string, []>("q_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_19_groups_0 = const()[name = tensor<string, []>("q_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_2292_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2292_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(236585920)))];
+            tensor<fp16, [1024]> var_2292_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2292_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238683136)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2292_cast_fp16 = conv(bias = var_2292_bias_0_to_fp16, dilations = q_19_dilations_0, groups = q_19_groups_0, pad = q_19_pad_0, pad_type = q_19_pad_type_0, strides = q_19_strides_0, weight = var_2292_weight_0_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("op_2292_cast_fp16")];
+            tensor<string, []> k_19_pad_type_0 = const()[name = tensor<string, []>("k_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_19_strides_0 = const()[name = tensor<string, []>("k_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_19_pad_0 = const()[name = tensor<string, []>("k_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_19_dilations_0 = const()[name = tensor<string, []>("k_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_19_groups_0 = const()[name = tensor<string, []>("k_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_9_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238685248)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_19_cast_fp16 = conv(dilations = k_19_dilations_0, groups = k_19_groups_0, pad = k_19_pad_0, pad_type = k_19_pad_type_0, strides = k_19_strides_0, weight = blocks_9_attn_key_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("k_19_cast_fp16")];
+            tensor<string, []> var_2290_pad_type_0 = const()[name = tensor<string, []>("op_2290_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2290_strides_0 = const()[name = tensor<string, []>("op_2290_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2290_pad_0 = const()[name = tensor<string, []>("op_2290_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2290_dilations_0 = const()[name = tensor<string, []>("op_2290_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2290_groups_0 = const()[name = tensor<string, []>("op_2290_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_9_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240782464)))];
+            tensor<fp16, [1024]> blocks_9_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242879680)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2290_cast_fp16 = conv(bias = blocks_9_attn_value_bias_to_fp16, dilations = var_2290_dilations_0, groups = var_2290_groups_0, pad = var_2290_pad_0, pad_type = var_2290_pad_type_0, strides = var_2290_strides_0, weight = blocks_9_attn_value_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("op_2290_cast_fp16")];
+            tensor<int32, [16]> tile_27 = const()[name = tensor<string, []>("tile_27"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2293_axis_0 = const()[name = tensor<string, []>("op_2293_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2293_cast_fp16_15 = split(axis = var_2293_axis_0, split_sizes = tile_27, x = var_2292_cast_fp16)[name = tensor<string, []>("op_2293_cast_fp16")];
+            tensor<int32, [4]> var_2310_perm_0 = const()[name = tensor<string, []>("op_2310_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_28 = const()[name = tensor<string, []>("tile_28"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2311_axis_0 = const()[name = tensor<string, []>("op_2311_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_2310_cast_fp16 = transpose(perm = var_2310_perm_0, x = k_19_cast_fp16)[name = tensor<string, []>("transpose_15")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2311_cast_fp16_15 = split(axis = var_2311_axis_0, split_sizes = tile_28, x = var_2310_cast_fp16)[name = tensor<string, []>("op_2311_cast_fp16")];
+            tensor<int32, [16]> tile_29 = const()[name = tensor<string, []>("tile_29"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2328_axis_0 = const()[name = tensor<string, []>("op_2328_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2328_cast_fp16_15 = split(axis = var_2328_axis_0, split_sizes = tile_29, x = var_2290_cast_fp16)[name = tensor<string, []>("op_2328_cast_fp16")];
+            tensor<string, []> aw_289_equation_0 = const()[name = tensor<string, []>("aw_289_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_289_cast_fp16 = einsum(equation = aw_289_equation_0, values = (var_2311_cast_fp16_0, var_2293_cast_fp16_0))[name = tensor<string, []>("aw_289_cast_fp16")];
+            tensor<string, []> aw_291_equation_0 = const()[name = tensor<string, []>("aw_291_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_291_cast_fp16 = einsum(equation = aw_291_equation_0, values = (var_2311_cast_fp16_1, var_2293_cast_fp16_1))[name = tensor<string, []>("aw_291_cast_fp16")];
+            tensor<string, []> aw_293_equation_0 = const()[name = tensor<string, []>("aw_293_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_293_cast_fp16 = einsum(equation = aw_293_equation_0, values = (var_2311_cast_fp16_2, var_2293_cast_fp16_2))[name = tensor<string, []>("aw_293_cast_fp16")];
+            tensor<string, []> aw_295_equation_0 = const()[name = tensor<string, []>("aw_295_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_295_cast_fp16 = einsum(equation = aw_295_equation_0, values = (var_2311_cast_fp16_3, var_2293_cast_fp16_3))[name = tensor<string, []>("aw_295_cast_fp16")];
+            tensor<string, []> aw_297_equation_0 = const()[name = tensor<string, []>("aw_297_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_297_cast_fp16 = einsum(equation = aw_297_equation_0, values = (var_2311_cast_fp16_4, var_2293_cast_fp16_4))[name = tensor<string, []>("aw_297_cast_fp16")];
+            tensor<string, []> aw_299_equation_0 = const()[name = tensor<string, []>("aw_299_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_299_cast_fp16 = einsum(equation = aw_299_equation_0, values = (var_2311_cast_fp16_5, var_2293_cast_fp16_5))[name = tensor<string, []>("aw_299_cast_fp16")];
+            tensor<string, []> aw_301_equation_0 = const()[name = tensor<string, []>("aw_301_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_301_cast_fp16 = einsum(equation = aw_301_equation_0, values = (var_2311_cast_fp16_6, var_2293_cast_fp16_6))[name = tensor<string, []>("aw_301_cast_fp16")];
+            tensor<string, []> aw_303_equation_0 = const()[name = tensor<string, []>("aw_303_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_303_cast_fp16 = einsum(equation = aw_303_equation_0, values = (var_2311_cast_fp16_7, var_2293_cast_fp16_7))[name = tensor<string, []>("aw_303_cast_fp16")];
+            tensor<string, []> aw_305_equation_0 = const()[name = tensor<string, []>("aw_305_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_305_cast_fp16 = einsum(equation = aw_305_equation_0, values = (var_2311_cast_fp16_8, var_2293_cast_fp16_8))[name = tensor<string, []>("aw_305_cast_fp16")];
+            tensor<string, []> aw_307_equation_0 = const()[name = tensor<string, []>("aw_307_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_307_cast_fp16 = einsum(equation = aw_307_equation_0, values = (var_2311_cast_fp16_9, var_2293_cast_fp16_9))[name = tensor<string, []>("aw_307_cast_fp16")];
+            tensor<string, []> aw_309_equation_0 = const()[name = tensor<string, []>("aw_309_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_309_cast_fp16 = einsum(equation = aw_309_equation_0, values = (var_2311_cast_fp16_10, var_2293_cast_fp16_10))[name = tensor<string, []>("aw_309_cast_fp16")];
+            tensor<string, []> aw_311_equation_0 = const()[name = tensor<string, []>("aw_311_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_311_cast_fp16 = einsum(equation = aw_311_equation_0, values = (var_2311_cast_fp16_11, var_2293_cast_fp16_11))[name = tensor<string, []>("aw_311_cast_fp16")];
+            tensor<string, []> aw_313_equation_0 = const()[name = tensor<string, []>("aw_313_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_313_cast_fp16 = einsum(equation = aw_313_equation_0, values = (var_2311_cast_fp16_12, var_2293_cast_fp16_12))[name = tensor<string, []>("aw_313_cast_fp16")];
+            tensor<string, []> aw_315_equation_0 = const()[name = tensor<string, []>("aw_315_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_315_cast_fp16 = einsum(equation = aw_315_equation_0, values = (var_2311_cast_fp16_13, var_2293_cast_fp16_13))[name = tensor<string, []>("aw_315_cast_fp16")];
+            tensor<string, []> aw_317_equation_0 = const()[name = tensor<string, []>("aw_317_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_317_cast_fp16 = einsum(equation = aw_317_equation_0, values = (var_2311_cast_fp16_14, var_2293_cast_fp16_14))[name = tensor<string, []>("aw_317_cast_fp16")];
+            tensor<string, []> aw_319_equation_0 = const()[name = tensor<string, []>("aw_319_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_319_cast_fp16 = einsum(equation = aw_319_equation_0, values = (var_2311_cast_fp16_15, var_2293_cast_fp16_15))[name = tensor<string, []>("aw_319_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2377_cast_fp16 = softmax(axis = var_2241, x = aw_289_cast_fp16)[name = tensor<string, []>("op_2377_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2378_cast_fp16 = softmax(axis = var_2241, x = aw_291_cast_fp16)[name = tensor<string, []>("op_2378_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2379_cast_fp16 = softmax(axis = var_2241, x = aw_293_cast_fp16)[name = tensor<string, []>("op_2379_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2380_cast_fp16 = softmax(axis = var_2241, x = aw_295_cast_fp16)[name = tensor<string, []>("op_2380_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2381_cast_fp16 = softmax(axis = var_2241, x = aw_297_cast_fp16)[name = tensor<string, []>("op_2381_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2382_cast_fp16 = softmax(axis = var_2241, x = aw_299_cast_fp16)[name = tensor<string, []>("op_2382_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2383_cast_fp16 = softmax(axis = var_2241, x = aw_301_cast_fp16)[name = tensor<string, []>("op_2383_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2384_cast_fp16 = softmax(axis = var_2241, x = aw_303_cast_fp16)[name = tensor<string, []>("op_2384_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2385_cast_fp16 = softmax(axis = var_2241, x = aw_305_cast_fp16)[name = tensor<string, []>("op_2385_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2386_cast_fp16 = softmax(axis = var_2241, x = aw_307_cast_fp16)[name = tensor<string, []>("op_2386_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2387_cast_fp16 = softmax(axis = var_2241, x = aw_309_cast_fp16)[name = tensor<string, []>("op_2387_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2388_cast_fp16 = softmax(axis = var_2241, x = aw_311_cast_fp16)[name = tensor<string, []>("op_2388_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2389_cast_fp16 = softmax(axis = var_2241, x = aw_313_cast_fp16)[name = tensor<string, []>("op_2389_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2390_cast_fp16 = softmax(axis = var_2241, x = aw_315_cast_fp16)[name = tensor<string, []>("op_2390_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2391_cast_fp16 = softmax(axis = var_2241, x = aw_317_cast_fp16)[name = tensor<string, []>("op_2391_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2392_cast_fp16 = softmax(axis = var_2241, x = aw_319_cast_fp16)[name = tensor<string, []>("op_2392_cast_fp16")];
+            tensor<string, []> var_2394_equation_0 = const()[name = tensor<string, []>("op_2394_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2394_cast_fp16 = einsum(equation = var_2394_equation_0, values = (var_2328_cast_fp16_0, var_2377_cast_fp16))[name = tensor<string, []>("op_2394_cast_fp16")];
+            tensor<string, []> var_2396_equation_0 = const()[name = tensor<string, []>("op_2396_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2396_cast_fp16 = einsum(equation = var_2396_equation_0, values = (var_2328_cast_fp16_1, var_2378_cast_fp16))[name = tensor<string, []>("op_2396_cast_fp16")];
+            tensor<string, []> var_2398_equation_0 = const()[name = tensor<string, []>("op_2398_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2398_cast_fp16 = einsum(equation = var_2398_equation_0, values = (var_2328_cast_fp16_2, var_2379_cast_fp16))[name = tensor<string, []>("op_2398_cast_fp16")];
+            tensor<string, []> var_2400_equation_0 = const()[name = tensor<string, []>("op_2400_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2400_cast_fp16 = einsum(equation = var_2400_equation_0, values = (var_2328_cast_fp16_3, var_2380_cast_fp16))[name = tensor<string, []>("op_2400_cast_fp16")];
+            tensor<string, []> var_2402_equation_0 = const()[name = tensor<string, []>("op_2402_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2402_cast_fp16 = einsum(equation = var_2402_equation_0, values = (var_2328_cast_fp16_4, var_2381_cast_fp16))[name = tensor<string, []>("op_2402_cast_fp16")];
+            tensor<string, []> var_2404_equation_0 = const()[name = tensor<string, []>("op_2404_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2404_cast_fp16 = einsum(equation = var_2404_equation_0, values = (var_2328_cast_fp16_5, var_2382_cast_fp16))[name = tensor<string, []>("op_2404_cast_fp16")];
+            tensor<string, []> var_2406_equation_0 = const()[name = tensor<string, []>("op_2406_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2406_cast_fp16 = einsum(equation = var_2406_equation_0, values = (var_2328_cast_fp16_6, var_2383_cast_fp16))[name = tensor<string, []>("op_2406_cast_fp16")];
+            tensor<string, []> var_2408_equation_0 = const()[name = tensor<string, []>("op_2408_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2408_cast_fp16 = einsum(equation = var_2408_equation_0, values = (var_2328_cast_fp16_7, var_2384_cast_fp16))[name = tensor<string, []>("op_2408_cast_fp16")];
+            tensor<string, []> var_2410_equation_0 = const()[name = tensor<string, []>("op_2410_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2410_cast_fp16 = einsum(equation = var_2410_equation_0, values = (var_2328_cast_fp16_8, var_2385_cast_fp16))[name = tensor<string, []>("op_2410_cast_fp16")];
+            tensor<string, []> var_2412_equation_0 = const()[name = tensor<string, []>("op_2412_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2412_cast_fp16 = einsum(equation = var_2412_equation_0, values = (var_2328_cast_fp16_9, var_2386_cast_fp16))[name = tensor<string, []>("op_2412_cast_fp16")];
+            tensor<string, []> var_2414_equation_0 = const()[name = tensor<string, []>("op_2414_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2414_cast_fp16 = einsum(equation = var_2414_equation_0, values = (var_2328_cast_fp16_10, var_2387_cast_fp16))[name = tensor<string, []>("op_2414_cast_fp16")];
+            tensor<string, []> var_2416_equation_0 = const()[name = tensor<string, []>("op_2416_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2416_cast_fp16 = einsum(equation = var_2416_equation_0, values = (var_2328_cast_fp16_11, var_2388_cast_fp16))[name = tensor<string, []>("op_2416_cast_fp16")];
+            tensor<string, []> var_2418_equation_0 = const()[name = tensor<string, []>("op_2418_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16 = einsum(equation = var_2418_equation_0, values = (var_2328_cast_fp16_12, var_2389_cast_fp16))[name = tensor<string, []>("op_2418_cast_fp16")];
+            tensor<string, []> var_2420_equation_0 = const()[name = tensor<string, []>("op_2420_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2420_cast_fp16 = einsum(equation = var_2420_equation_0, values = (var_2328_cast_fp16_13, var_2390_cast_fp16))[name = tensor<string, []>("op_2420_cast_fp16")];
+            tensor<string, []> var_2422_equation_0 = const()[name = tensor<string, []>("op_2422_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2422_cast_fp16 = einsum(equation = var_2422_equation_0, values = (var_2328_cast_fp16_14, var_2391_cast_fp16))[name = tensor<string, []>("op_2422_cast_fp16")];
+            tensor<string, []> var_2424_equation_0 = const()[name = tensor<string, []>("op_2424_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2424_cast_fp16 = einsum(equation = var_2424_equation_0, values = (var_2328_cast_fp16_15, var_2392_cast_fp16))[name = tensor<string, []>("op_2424_cast_fp16")];
+            tensor<bool, []> input_95_interleave_0 = const()[name = tensor<string, []>("input_95_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_95_cast_fp16 = concat(axis = var_2241, interleave = input_95_interleave_0, values = (var_2394_cast_fp16, var_2396_cast_fp16, var_2398_cast_fp16, var_2400_cast_fp16, var_2402_cast_fp16, var_2404_cast_fp16, var_2406_cast_fp16, var_2408_cast_fp16, var_2410_cast_fp16, var_2412_cast_fp16, var_2414_cast_fp16, var_2416_cast_fp16, var_2418_cast_fp16, var_2420_cast_fp16, var_2422_cast_fp16, var_2424_cast_fp16))[name = tensor<string, []>("input_95_cast_fp16")];
+            tensor<string, []> var_2433_pad_type_0 = const()[name = tensor<string, []>("op_2433_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2433_strides_0 = const()[name = tensor<string, []>("op_2433_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2433_pad_0 = const()[name = tensor<string, []>("op_2433_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2433_dilations_0 = const()[name = tensor<string, []>("op_2433_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2433_groups_0 = const()[name = tensor<string, []>("op_2433_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_9_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242881792)))];
+            tensor<fp16, [1024]> blocks_9_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244979008)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2433_cast_fp16 = conv(bias = blocks_9_attn_out_bias_to_fp16, dilations = var_2433_dilations_0, groups = var_2433_groups_0, pad = var_2433_pad_0, pad_type = var_2433_pad_type_0, strides = var_2433_strides_0, weight = blocks_9_attn_out_weight_to_fp16, x = input_95_cast_fp16)[name = tensor<string, []>("op_2433_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = var_2433_cast_fp16)[name = tensor<string, []>("inputs_39_cast_fp16")];
+            tensor<int32, [1]> input_97_axes_0 = const()[name = tensor<string, []>("input_97_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_97_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_97_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244981120)))];
+            tensor<fp16, [1024]> input_97_beta_0_to_fp16 = const()[name = tensor<string, []>("input_97_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244983232)))];
+            tensor<fp16, []> var_2443_to_fp16 = const()[name = tensor<string, []>("op_2443_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_97_cast_fp16 = layer_norm(axes = input_97_axes_0, beta = input_97_beta_0_to_fp16, epsilon = var_2443_to_fp16, gamma = input_97_gamma_0_to_fp16, x = inputs_39_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")];
+            tensor<string, []> input_99_pad_type_0 = const()[name = tensor<string, []>("input_99_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_99_strides_0 = const()[name = tensor<string, []>("input_99_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_99_pad_0 = const()[name = tensor<string, []>("input_99_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_99_dilations_0 = const()[name = tensor<string, []>("input_99_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_99_groups_0 = const()[name = tensor<string, []>("input_99_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_9_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244985344)))];
+            tensor<fp16, [4096]> blocks_9_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(253374016)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_99_cast_fp16 = conv(bias = blocks_9_mlp_0_bias_to_fp16, dilations = input_99_dilations_0, groups = input_99_groups_0, pad = input_99_pad_0, pad_type = input_99_pad_type_0, strides = input_99_strides_0, weight = blocks_9_mlp_0_weight_to_fp16, x = input_97_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
+            tensor<string, []> input_101_mode_0 = const()[name = tensor<string, []>("input_101_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_101_cast_fp16 = gelu(mode = input_101_mode_0, x = input_99_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
+            tensor<string, []> var_2469_pad_type_0 = const()[name = tensor<string, []>("op_2469_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2469_strides_0 = const()[name = tensor<string, []>("op_2469_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2469_pad_0 = const()[name = tensor<string, []>("op_2469_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2469_dilations_0 = const()[name = tensor<string, []>("op_2469_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2469_groups_0 = const()[name = tensor<string, []>("op_2469_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_9_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(253382272)))];
+            tensor<fp16, [1024]> blocks_9_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261770944)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2469_cast_fp16 = conv(bias = blocks_9_mlp_2_bias_to_fp16, dilations = var_2469_dilations_0, groups = var_2469_groups_0, pad = var_2469_pad_0, pad_type = var_2469_pad_type_0, strides = var_2469_strides_0, weight = blocks_9_mlp_2_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("op_2469_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = var_2469_cast_fp16)[name = tensor<string, []>("inputs_41_cast_fp16")];
+            tensor<int32, []> var_2478 = const()[name = tensor<string, []>("op_2478"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_103_axes_0 = const()[name = tensor<string, []>("input_103_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_103_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_103_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261773056)))];
+            tensor<fp16, [1024]> input_103_beta_0_to_fp16 = const()[name = tensor<string, []>("input_103_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261775168)))];
+            tensor<fp16, []> var_2494_to_fp16 = const()[name = tensor<string, []>("op_2494_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_103_cast_fp16 = layer_norm(axes = input_103_axes_0, beta = input_103_beta_0_to_fp16, epsilon = var_2494_to_fp16, gamma = input_103_gamma_0_to_fp16, x = inputs_41_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
+            tensor<string, []> q_21_pad_type_0 = const()[name = tensor<string, []>("q_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_21_strides_0 = const()[name = tensor<string, []>("q_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_21_pad_0 = const()[name = tensor<string, []>("q_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_21_dilations_0 = const()[name = tensor<string, []>("q_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_21_groups_0 = const()[name = tensor<string, []>("q_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_2529_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2529_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(261777280)))];
+            tensor<fp16, [1024]> var_2529_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2529_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263874496)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2529_cast_fp16 = conv(bias = var_2529_bias_0_to_fp16, dilations = q_21_dilations_0, groups = q_21_groups_0, pad = q_21_pad_0, pad_type = q_21_pad_type_0, strides = q_21_strides_0, weight = var_2529_weight_0_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("op_2529_cast_fp16")];
+            tensor<string, []> k_21_pad_type_0 = const()[name = tensor<string, []>("k_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_21_strides_0 = const()[name = tensor<string, []>("k_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_21_pad_0 = const()[name = tensor<string, []>("k_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_21_dilations_0 = const()[name = tensor<string, []>("k_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_21_groups_0 = const()[name = tensor<string, []>("k_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_10_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263876608)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_21_cast_fp16 = conv(dilations = k_21_dilations_0, groups = k_21_groups_0, pad = k_21_pad_0, pad_type = k_21_pad_type_0, strides = k_21_strides_0, weight = blocks_10_attn_key_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("k_21_cast_fp16")];
+            tensor<string, []> var_2527_pad_type_0 = const()[name = tensor<string, []>("op_2527_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2527_strides_0 = const()[name = tensor<string, []>("op_2527_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2527_pad_0 = const()[name = tensor<string, []>("op_2527_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2527_dilations_0 = const()[name = tensor<string, []>("op_2527_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2527_groups_0 = const()[name = tensor<string, []>("op_2527_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_10_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(265973824)))];
+            tensor<fp16, [1024]> blocks_10_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268071040)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2527_cast_fp16 = conv(bias = blocks_10_attn_value_bias_to_fp16, dilations = var_2527_dilations_0, groups = var_2527_groups_0, pad = var_2527_pad_0, pad_type = var_2527_pad_type_0, strides = var_2527_strides_0, weight = blocks_10_attn_value_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("op_2527_cast_fp16")];
+            tensor<int32, [16]> tile_30 = const()[name = tensor<string, []>("tile_30"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2530_axis_0 = const()[name = tensor<string, []>("op_2530_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2530_cast_fp16_15 = split(axis = var_2530_axis_0, split_sizes = tile_30, x = var_2529_cast_fp16)[name = tensor<string, []>("op_2530_cast_fp16")];
+            tensor<int32, [4]> var_2547_perm_0 = const()[name = tensor<string, []>("op_2547_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_31 = const()[name = tensor<string, []>("tile_31"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2548_axis_0 = const()[name = tensor<string, []>("op_2548_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_2547_cast_fp16 = transpose(perm = var_2547_perm_0, x = k_21_cast_fp16)[name = tensor<string, []>("transpose_14")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2548_cast_fp16_15 = split(axis = var_2548_axis_0, split_sizes = tile_31, x = var_2547_cast_fp16)[name = tensor<string, []>("op_2548_cast_fp16")];
+            tensor<int32, [16]> tile_32 = const()[name = tensor<string, []>("tile_32"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2565_axis_0 = const()[name = tensor<string, []>("op_2565_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2565_cast_fp16_15 = split(axis = var_2565_axis_0, split_sizes = tile_32, x = var_2527_cast_fp16)[name = tensor<string, []>("op_2565_cast_fp16")];
+            tensor<string, []> aw_321_equation_0 = const()[name = tensor<string, []>("aw_321_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_321_cast_fp16 = einsum(equation = aw_321_equation_0, values = (var_2548_cast_fp16_0, var_2530_cast_fp16_0))[name = tensor<string, []>("aw_321_cast_fp16")];
+            tensor<string, []> aw_323_equation_0 = const()[name = tensor<string, []>("aw_323_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_323_cast_fp16 = einsum(equation = aw_323_equation_0, values = (var_2548_cast_fp16_1, var_2530_cast_fp16_1))[name = tensor<string, []>("aw_323_cast_fp16")];
+            tensor<string, []> aw_325_equation_0 = const()[name = tensor<string, []>("aw_325_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_325_cast_fp16 = einsum(equation = aw_325_equation_0, values = (var_2548_cast_fp16_2, var_2530_cast_fp16_2))[name = tensor<string, []>("aw_325_cast_fp16")];
+            tensor<string, []> aw_327_equation_0 = const()[name = tensor<string, []>("aw_327_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_327_cast_fp16 = einsum(equation = aw_327_equation_0, values = (var_2548_cast_fp16_3, var_2530_cast_fp16_3))[name = tensor<string, []>("aw_327_cast_fp16")];
+            tensor<string, []> aw_329_equation_0 = const()[name = tensor<string, []>("aw_329_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_329_cast_fp16 = einsum(equation = aw_329_equation_0, values = (var_2548_cast_fp16_4, var_2530_cast_fp16_4))[name = tensor<string, []>("aw_329_cast_fp16")];
+            tensor<string, []> aw_331_equation_0 = const()[name = tensor<string, []>("aw_331_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_331_cast_fp16 = einsum(equation = aw_331_equation_0, values = (var_2548_cast_fp16_5, var_2530_cast_fp16_5))[name = tensor<string, []>("aw_331_cast_fp16")];
+            tensor<string, []> aw_333_equation_0 = const()[name = tensor<string, []>("aw_333_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_333_cast_fp16 = einsum(equation = aw_333_equation_0, values = (var_2548_cast_fp16_6, var_2530_cast_fp16_6))[name = tensor<string, []>("aw_333_cast_fp16")];
+            tensor<string, []> aw_335_equation_0 = const()[name = tensor<string, []>("aw_335_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_335_cast_fp16 = einsum(equation = aw_335_equation_0, values = (var_2548_cast_fp16_7, var_2530_cast_fp16_7))[name = tensor<string, []>("aw_335_cast_fp16")];
+            tensor<string, []> aw_337_equation_0 = const()[name = tensor<string, []>("aw_337_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_337_cast_fp16 = einsum(equation = aw_337_equation_0, values = (var_2548_cast_fp16_8, var_2530_cast_fp16_8))[name = tensor<string, []>("aw_337_cast_fp16")];
+            tensor<string, []> aw_339_equation_0 = const()[name = tensor<string, []>("aw_339_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_339_cast_fp16 = einsum(equation = aw_339_equation_0, values = (var_2548_cast_fp16_9, var_2530_cast_fp16_9))[name = tensor<string, []>("aw_339_cast_fp16")];
+            tensor<string, []> aw_341_equation_0 = const()[name = tensor<string, []>("aw_341_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_341_cast_fp16 = einsum(equation = aw_341_equation_0, values = (var_2548_cast_fp16_10, var_2530_cast_fp16_10))[name = tensor<string, []>("aw_341_cast_fp16")];
+            tensor<string, []> aw_343_equation_0 = const()[name = tensor<string, []>("aw_343_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_343_cast_fp16 = einsum(equation = aw_343_equation_0, values = (var_2548_cast_fp16_11, var_2530_cast_fp16_11))[name = tensor<string, []>("aw_343_cast_fp16")];
+            tensor<string, []> aw_345_equation_0 = const()[name = tensor<string, []>("aw_345_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_345_cast_fp16 = einsum(equation = aw_345_equation_0, values = (var_2548_cast_fp16_12, var_2530_cast_fp16_12))[name = tensor<string, []>("aw_345_cast_fp16")];
+            tensor<string, []> aw_347_equation_0 = const()[name = tensor<string, []>("aw_347_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_347_cast_fp16 = einsum(equation = aw_347_equation_0, values = (var_2548_cast_fp16_13, var_2530_cast_fp16_13))[name = tensor<string, []>("aw_347_cast_fp16")];
+            tensor<string, []> aw_349_equation_0 = const()[name = tensor<string, []>("aw_349_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_349_cast_fp16 = einsum(equation = aw_349_equation_0, values = (var_2548_cast_fp16_14, var_2530_cast_fp16_14))[name = tensor<string, []>("aw_349_cast_fp16")];
+            tensor<string, []> aw_351_equation_0 = const()[name = tensor<string, []>("aw_351_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_351_cast_fp16 = einsum(equation = aw_351_equation_0, values = (var_2548_cast_fp16_15, var_2530_cast_fp16_15))[name = tensor<string, []>("aw_351_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2614_cast_fp16 = softmax(axis = var_2478, x = aw_321_cast_fp16)[name = tensor<string, []>("op_2614_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2615_cast_fp16 = softmax(axis = var_2478, x = aw_323_cast_fp16)[name = tensor<string, []>("op_2615_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2616_cast_fp16 = softmax(axis = var_2478, x = aw_325_cast_fp16)[name = tensor<string, []>("op_2616_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2617_cast_fp16 = softmax(axis = var_2478, x = aw_327_cast_fp16)[name = tensor<string, []>("op_2617_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2618_cast_fp16 = softmax(axis = var_2478, x = aw_329_cast_fp16)[name = tensor<string, []>("op_2618_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2619_cast_fp16 = softmax(axis = var_2478, x = aw_331_cast_fp16)[name = tensor<string, []>("op_2619_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2620_cast_fp16 = softmax(axis = var_2478, x = aw_333_cast_fp16)[name = tensor<string, []>("op_2620_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2621_cast_fp16 = softmax(axis = var_2478, x = aw_335_cast_fp16)[name = tensor<string, []>("op_2621_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2622_cast_fp16 = softmax(axis = var_2478, x = aw_337_cast_fp16)[name = tensor<string, []>("op_2622_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2623_cast_fp16 = softmax(axis = var_2478, x = aw_339_cast_fp16)[name = tensor<string, []>("op_2623_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2624_cast_fp16 = softmax(axis = var_2478, x = aw_341_cast_fp16)[name = tensor<string, []>("op_2624_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2625_cast_fp16 = softmax(axis = var_2478, x = aw_343_cast_fp16)[name = tensor<string, []>("op_2625_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2626_cast_fp16 = softmax(axis = var_2478, x = aw_345_cast_fp16)[name = tensor<string, []>("op_2626_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2627_cast_fp16 = softmax(axis = var_2478, x = aw_347_cast_fp16)[name = tensor<string, []>("op_2627_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2628_cast_fp16 = softmax(axis = var_2478, x = aw_349_cast_fp16)[name = tensor<string, []>("op_2628_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2629_cast_fp16 = softmax(axis = var_2478, x = aw_351_cast_fp16)[name = tensor<string, []>("op_2629_cast_fp16")];
+            tensor<string, []> var_2631_equation_0 = const()[name = tensor<string, []>("op_2631_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2631_cast_fp16 = einsum(equation = var_2631_equation_0, values = (var_2565_cast_fp16_0, var_2614_cast_fp16))[name = tensor<string, []>("op_2631_cast_fp16")];
+            tensor<string, []> var_2633_equation_0 = const()[name = tensor<string, []>("op_2633_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2633_cast_fp16 = einsum(equation = var_2633_equation_0, values = (var_2565_cast_fp16_1, var_2615_cast_fp16))[name = tensor<string, []>("op_2633_cast_fp16")];
+            tensor<string, []> var_2635_equation_0 = const()[name = tensor<string, []>("op_2635_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2635_cast_fp16 = einsum(equation = var_2635_equation_0, values = (var_2565_cast_fp16_2, var_2616_cast_fp16))[name = tensor<string, []>("op_2635_cast_fp16")];
+            tensor<string, []> var_2637_equation_0 = const()[name = tensor<string, []>("op_2637_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2637_cast_fp16 = einsum(equation = var_2637_equation_0, values = (var_2565_cast_fp16_3, var_2617_cast_fp16))[name = tensor<string, []>("op_2637_cast_fp16")];
+            tensor<string, []> var_2639_equation_0 = const()[name = tensor<string, []>("op_2639_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2639_cast_fp16 = einsum(equation = var_2639_equation_0, values = (var_2565_cast_fp16_4, var_2618_cast_fp16))[name = tensor<string, []>("op_2639_cast_fp16")];
+            tensor<string, []> var_2641_equation_0 = const()[name = tensor<string, []>("op_2641_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2641_cast_fp16 = einsum(equation = var_2641_equation_0, values = (var_2565_cast_fp16_5, var_2619_cast_fp16))[name = tensor<string, []>("op_2641_cast_fp16")];
+            tensor<string, []> var_2643_equation_0 = const()[name = tensor<string, []>("op_2643_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2643_cast_fp16 = einsum(equation = var_2643_equation_0, values = (var_2565_cast_fp16_6, var_2620_cast_fp16))[name = tensor<string, []>("op_2643_cast_fp16")];
+            tensor<string, []> var_2645_equation_0 = const()[name = tensor<string, []>("op_2645_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2645_cast_fp16 = einsum(equation = var_2645_equation_0, values = (var_2565_cast_fp16_7, var_2621_cast_fp16))[name = tensor<string, []>("op_2645_cast_fp16")];
+            tensor<string, []> var_2647_equation_0 = const()[name = tensor<string, []>("op_2647_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2647_cast_fp16 = einsum(equation = var_2647_equation_0, values = (var_2565_cast_fp16_8, var_2622_cast_fp16))[name = tensor<string, []>("op_2647_cast_fp16")];
+            tensor<string, []> var_2649_equation_0 = const()[name = tensor<string, []>("op_2649_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2649_cast_fp16 = einsum(equation = var_2649_equation_0, values = (var_2565_cast_fp16_9, var_2623_cast_fp16))[name = tensor<string, []>("op_2649_cast_fp16")];
+            tensor<string, []> var_2651_equation_0 = const()[name = tensor<string, []>("op_2651_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2651_cast_fp16 = einsum(equation = var_2651_equation_0, values = (var_2565_cast_fp16_10, var_2624_cast_fp16))[name = tensor<string, []>("op_2651_cast_fp16")];
+            tensor<string, []> var_2653_equation_0 = const()[name = tensor<string, []>("op_2653_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2653_cast_fp16 = einsum(equation = var_2653_equation_0, values = (var_2565_cast_fp16_11, var_2625_cast_fp16))[name = tensor<string, []>("op_2653_cast_fp16")];
+            tensor<string, []> var_2655_equation_0 = const()[name = tensor<string, []>("op_2655_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2655_cast_fp16 = einsum(equation = var_2655_equation_0, values = (var_2565_cast_fp16_12, var_2626_cast_fp16))[name = tensor<string, []>("op_2655_cast_fp16")];
+            tensor<string, []> var_2657_equation_0 = const()[name = tensor<string, []>("op_2657_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2657_cast_fp16 = einsum(equation = var_2657_equation_0, values = (var_2565_cast_fp16_13, var_2627_cast_fp16))[name = tensor<string, []>("op_2657_cast_fp16")];
+            tensor<string, []> var_2659_equation_0 = const()[name = tensor<string, []>("op_2659_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2659_cast_fp16 = einsum(equation = var_2659_equation_0, values = (var_2565_cast_fp16_14, var_2628_cast_fp16))[name = tensor<string, []>("op_2659_cast_fp16")];
+            tensor<string, []> var_2661_equation_0 = const()[name = tensor<string, []>("op_2661_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2661_cast_fp16 = einsum(equation = var_2661_equation_0, values = (var_2565_cast_fp16_15, var_2629_cast_fp16))[name = tensor<string, []>("op_2661_cast_fp16")];
+            tensor<bool, []> input_105_interleave_0 = const()[name = tensor<string, []>("input_105_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_105_cast_fp16 = concat(axis = var_2478, interleave = input_105_interleave_0, values = (var_2631_cast_fp16, var_2633_cast_fp16, var_2635_cast_fp16, var_2637_cast_fp16, var_2639_cast_fp16, var_2641_cast_fp16, var_2643_cast_fp16, var_2645_cast_fp16, var_2647_cast_fp16, var_2649_cast_fp16, var_2651_cast_fp16, var_2653_cast_fp16, var_2655_cast_fp16, var_2657_cast_fp16, var_2659_cast_fp16, var_2661_cast_fp16))[name = tensor<string, []>("input_105_cast_fp16")];
+            tensor<string, []> var_2670_pad_type_0 = const()[name = tensor<string, []>("op_2670_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2670_strides_0 = const()[name = tensor<string, []>("op_2670_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2670_pad_0 = const()[name = tensor<string, []>("op_2670_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2670_dilations_0 = const()[name = tensor<string, []>("op_2670_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2670_groups_0 = const()[name = tensor<string, []>("op_2670_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_10_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268073152)))];
+            tensor<fp16, [1024]> blocks_10_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270170368)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2670_cast_fp16 = conv(bias = blocks_10_attn_out_bias_to_fp16, dilations = var_2670_dilations_0, groups = var_2670_groups_0, pad = var_2670_pad_0, pad_type = var_2670_pad_type_0, strides = var_2670_strides_0, weight = blocks_10_attn_out_weight_to_fp16, x = input_105_cast_fp16)[name = tensor<string, []>("op_2670_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = var_2670_cast_fp16)[name = tensor<string, []>("inputs_43_cast_fp16")];
+            tensor<int32, [1]> input_107_axes_0 = const()[name = tensor<string, []>("input_107_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_107_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_107_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270172480)))];
+            tensor<fp16, [1024]> input_107_beta_0_to_fp16 = const()[name = tensor<string, []>("input_107_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270174592)))];
+            tensor<fp16, []> var_2680_to_fp16 = const()[name = tensor<string, []>("op_2680_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_107_cast_fp16 = layer_norm(axes = input_107_axes_0, beta = input_107_beta_0_to_fp16, epsilon = var_2680_to_fp16, gamma = input_107_gamma_0_to_fp16, x = inputs_43_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
+            tensor<string, []> input_109_pad_type_0 = const()[name = tensor<string, []>("input_109_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_109_strides_0 = const()[name = tensor<string, []>("input_109_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_109_pad_0 = const()[name = tensor<string, []>("input_109_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_109_dilations_0 = const()[name = tensor<string, []>("input_109_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_109_groups_0 = const()[name = tensor<string, []>("input_109_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_10_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270176704)))];
+            tensor<fp16, [4096]> blocks_10_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(278565376)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_109_cast_fp16 = conv(bias = blocks_10_mlp_0_bias_to_fp16, dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = blocks_10_mlp_0_weight_to_fp16, x = input_107_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
+            tensor<string, []> input_111_mode_0 = const()[name = tensor<string, []>("input_111_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_111_cast_fp16 = gelu(mode = input_111_mode_0, x = input_109_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
+            tensor<string, []> var_2706_pad_type_0 = const()[name = tensor<string, []>("op_2706_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2706_strides_0 = const()[name = tensor<string, []>("op_2706_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2706_pad_0 = const()[name = tensor<string, []>("op_2706_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2706_dilations_0 = const()[name = tensor<string, []>("op_2706_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2706_groups_0 = const()[name = tensor<string, []>("op_2706_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_10_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(278573632)))];
+            tensor<fp16, [1024]> blocks_10_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286962304)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2706_cast_fp16 = conv(bias = blocks_10_mlp_2_bias_to_fp16, dilations = var_2706_dilations_0, groups = var_2706_groups_0, pad = var_2706_pad_0, pad_type = var_2706_pad_type_0, strides = var_2706_strides_0, weight = blocks_10_mlp_2_weight_to_fp16, x = input_111_cast_fp16)[name = tensor<string, []>("op_2706_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = var_2706_cast_fp16)[name = tensor<string, []>("inputs_45_cast_fp16")];
+            tensor<int32, []> var_2715 = const()[name = tensor<string, []>("op_2715"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_113_axes_0 = const()[name = tensor<string, []>("input_113_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_113_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_113_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286964416)))];
+            tensor<fp16, [1024]> input_113_beta_0_to_fp16 = const()[name = tensor<string, []>("input_113_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286966528)))];
+            tensor<fp16, []> var_2731_to_fp16 = const()[name = tensor<string, []>("op_2731_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_113_cast_fp16 = layer_norm(axes = input_113_axes_0, beta = input_113_beta_0_to_fp16, epsilon = var_2731_to_fp16, gamma = input_113_gamma_0_to_fp16, x = inputs_45_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
+            tensor<string, []> q_23_pad_type_0 = const()[name = tensor<string, []>("q_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_23_strides_0 = const()[name = tensor<string, []>("q_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_23_pad_0 = const()[name = tensor<string, []>("q_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_23_dilations_0 = const()[name = tensor<string, []>("q_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_23_groups_0 = const()[name = tensor<string, []>("q_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_2766_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2766_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(286968640)))];
+            tensor<fp16, [1024]> var_2766_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2766_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289065856)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2766_cast_fp16 = conv(bias = var_2766_bias_0_to_fp16, dilations = q_23_dilations_0, groups = q_23_groups_0, pad = q_23_pad_0, pad_type = q_23_pad_type_0, strides = q_23_strides_0, weight = var_2766_weight_0_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("op_2766_cast_fp16")];
+            tensor<string, []> k_23_pad_type_0 = const()[name = tensor<string, []>("k_23_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_23_strides_0 = const()[name = tensor<string, []>("k_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_23_pad_0 = const()[name = tensor<string, []>("k_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_23_dilations_0 = const()[name = tensor<string, []>("k_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_23_groups_0 = const()[name = tensor<string, []>("k_23_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_11_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(289067968)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_23_cast_fp16 = conv(dilations = k_23_dilations_0, groups = k_23_groups_0, pad = k_23_pad_0, pad_type = k_23_pad_type_0, strides = k_23_strides_0, weight = blocks_11_attn_key_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("k_23_cast_fp16")];
+            tensor<string, []> var_2764_pad_type_0 = const()[name = tensor<string, []>("op_2764_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2764_strides_0 = const()[name = tensor<string, []>("op_2764_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2764_pad_0 = const()[name = tensor<string, []>("op_2764_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2764_dilations_0 = const()[name = tensor<string, []>("op_2764_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2764_groups_0 = const()[name = tensor<string, []>("op_2764_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_11_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(291165184)))];
+            tensor<fp16, [1024]> blocks_11_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293262400)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2764_cast_fp16 = conv(bias = blocks_11_attn_value_bias_to_fp16, dilations = var_2764_dilations_0, groups = var_2764_groups_0, pad = var_2764_pad_0, pad_type = var_2764_pad_type_0, strides = var_2764_strides_0, weight = blocks_11_attn_value_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("op_2764_cast_fp16")];
+            tensor<int32, [16]> tile_33 = const()[name = tensor<string, []>("tile_33"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2767_axis_0 = const()[name = tensor<string, []>("op_2767_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2767_cast_fp16_15 = split(axis = var_2767_axis_0, split_sizes = tile_33, x = var_2766_cast_fp16)[name = tensor<string, []>("op_2767_cast_fp16")];
+            tensor<int32, [4]> var_2784_perm_0 = const()[name = tensor<string, []>("op_2784_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_34 = const()[name = tensor<string, []>("tile_34"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2785_axis_0 = const()[name = tensor<string, []>("op_2785_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_2784_cast_fp16 = transpose(perm = var_2784_perm_0, x = k_23_cast_fp16)[name = tensor<string, []>("transpose_13")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_2785_cast_fp16_15 = split(axis = var_2785_axis_0, split_sizes = tile_34, x = var_2784_cast_fp16)[name = tensor<string, []>("op_2785_cast_fp16")];
+            tensor<int32, [16]> tile_35 = const()[name = tensor<string, []>("tile_35"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2802_axis_0 = const()[name = tensor<string, []>("op_2802_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_2802_cast_fp16_15 = split(axis = var_2802_axis_0, split_sizes = tile_35, x = var_2764_cast_fp16)[name = tensor<string, []>("op_2802_cast_fp16")];
+            tensor<string, []> aw_353_equation_0 = const()[name = tensor<string, []>("aw_353_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_353_cast_fp16 = einsum(equation = aw_353_equation_0, values = (var_2785_cast_fp16_0, var_2767_cast_fp16_0))[name = tensor<string, []>("aw_353_cast_fp16")];
+            tensor<string, []> aw_355_equation_0 = const()[name = tensor<string, []>("aw_355_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_355_cast_fp16 = einsum(equation = aw_355_equation_0, values = (var_2785_cast_fp16_1, var_2767_cast_fp16_1))[name = tensor<string, []>("aw_355_cast_fp16")];
+            tensor<string, []> aw_357_equation_0 = const()[name = tensor<string, []>("aw_357_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_357_cast_fp16 = einsum(equation = aw_357_equation_0, values = (var_2785_cast_fp16_2, var_2767_cast_fp16_2))[name = tensor<string, []>("aw_357_cast_fp16")];
+            tensor<string, []> aw_359_equation_0 = const()[name = tensor<string, []>("aw_359_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_359_cast_fp16 = einsum(equation = aw_359_equation_0, values = (var_2785_cast_fp16_3, var_2767_cast_fp16_3))[name = tensor<string, []>("aw_359_cast_fp16")];
+            tensor<string, []> aw_361_equation_0 = const()[name = tensor<string, []>("aw_361_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_361_cast_fp16 = einsum(equation = aw_361_equation_0, values = (var_2785_cast_fp16_4, var_2767_cast_fp16_4))[name = tensor<string, []>("aw_361_cast_fp16")];
+            tensor<string, []> aw_363_equation_0 = const()[name = tensor<string, []>("aw_363_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_363_cast_fp16 = einsum(equation = aw_363_equation_0, values = (var_2785_cast_fp16_5, var_2767_cast_fp16_5))[name = tensor<string, []>("aw_363_cast_fp16")];
+            tensor<string, []> aw_365_equation_0 = const()[name = tensor<string, []>("aw_365_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_365_cast_fp16 = einsum(equation = aw_365_equation_0, values = (var_2785_cast_fp16_6, var_2767_cast_fp16_6))[name = tensor<string, []>("aw_365_cast_fp16")];
+            tensor<string, []> aw_367_equation_0 = const()[name = tensor<string, []>("aw_367_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_367_cast_fp16 = einsum(equation = aw_367_equation_0, values = (var_2785_cast_fp16_7, var_2767_cast_fp16_7))[name = tensor<string, []>("aw_367_cast_fp16")];
+            tensor<string, []> aw_369_equation_0 = const()[name = tensor<string, []>("aw_369_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_369_cast_fp16 = einsum(equation = aw_369_equation_0, values = (var_2785_cast_fp16_8, var_2767_cast_fp16_8))[name = tensor<string, []>("aw_369_cast_fp16")];
+            tensor<string, []> aw_371_equation_0 = const()[name = tensor<string, []>("aw_371_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_371_cast_fp16 = einsum(equation = aw_371_equation_0, values = (var_2785_cast_fp16_9, var_2767_cast_fp16_9))[name = tensor<string, []>("aw_371_cast_fp16")];
+            tensor<string, []> aw_373_equation_0 = const()[name = tensor<string, []>("aw_373_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_373_cast_fp16 = einsum(equation = aw_373_equation_0, values = (var_2785_cast_fp16_10, var_2767_cast_fp16_10))[name = tensor<string, []>("aw_373_cast_fp16")];
+            tensor<string, []> aw_375_equation_0 = const()[name = tensor<string, []>("aw_375_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_375_cast_fp16 = einsum(equation = aw_375_equation_0, values = (var_2785_cast_fp16_11, var_2767_cast_fp16_11))[name = tensor<string, []>("aw_375_cast_fp16")];
+            tensor<string, []> aw_377_equation_0 = const()[name = tensor<string, []>("aw_377_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_377_cast_fp16 = einsum(equation = aw_377_equation_0, values = (var_2785_cast_fp16_12, var_2767_cast_fp16_12))[name = tensor<string, []>("aw_377_cast_fp16")];
+            tensor<string, []> aw_379_equation_0 = const()[name = tensor<string, []>("aw_379_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_379_cast_fp16 = einsum(equation = aw_379_equation_0, values = (var_2785_cast_fp16_13, var_2767_cast_fp16_13))[name = tensor<string, []>("aw_379_cast_fp16")];
+            tensor<string, []> aw_381_equation_0 = const()[name = tensor<string, []>("aw_381_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_381_cast_fp16 = einsum(equation = aw_381_equation_0, values = (var_2785_cast_fp16_14, var_2767_cast_fp16_14))[name = tensor<string, []>("aw_381_cast_fp16")];
+            tensor<string, []> aw_383_equation_0 = const()[name = tensor<string, []>("aw_383_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_383_cast_fp16 = einsum(equation = aw_383_equation_0, values = (var_2785_cast_fp16_15, var_2767_cast_fp16_15))[name = tensor<string, []>("aw_383_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2851_cast_fp16 = softmax(axis = var_2715, x = aw_353_cast_fp16)[name = tensor<string, []>("op_2851_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2852_cast_fp16 = softmax(axis = var_2715, x = aw_355_cast_fp16)[name = tensor<string, []>("op_2852_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2853_cast_fp16 = softmax(axis = var_2715, x = aw_357_cast_fp16)[name = tensor<string, []>("op_2853_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2854_cast_fp16 = softmax(axis = var_2715, x = aw_359_cast_fp16)[name = tensor<string, []>("op_2854_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2855_cast_fp16 = softmax(axis = var_2715, x = aw_361_cast_fp16)[name = tensor<string, []>("op_2855_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2856_cast_fp16 = softmax(axis = var_2715, x = aw_363_cast_fp16)[name = tensor<string, []>("op_2856_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2857_cast_fp16 = softmax(axis = var_2715, x = aw_365_cast_fp16)[name = tensor<string, []>("op_2857_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2858_cast_fp16 = softmax(axis = var_2715, x = aw_367_cast_fp16)[name = tensor<string, []>("op_2858_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2859_cast_fp16 = softmax(axis = var_2715, x = aw_369_cast_fp16)[name = tensor<string, []>("op_2859_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2860_cast_fp16 = softmax(axis = var_2715, x = aw_371_cast_fp16)[name = tensor<string, []>("op_2860_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2861_cast_fp16 = softmax(axis = var_2715, x = aw_373_cast_fp16)[name = tensor<string, []>("op_2861_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2862_cast_fp16 = softmax(axis = var_2715, x = aw_375_cast_fp16)[name = tensor<string, []>("op_2862_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2863_cast_fp16 = softmax(axis = var_2715, x = aw_377_cast_fp16)[name = tensor<string, []>("op_2863_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2864_cast_fp16 = softmax(axis = var_2715, x = aw_379_cast_fp16)[name = tensor<string, []>("op_2864_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2865_cast_fp16 = softmax(axis = var_2715, x = aw_381_cast_fp16)[name = tensor<string, []>("op_2865_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2866_cast_fp16 = softmax(axis = var_2715, x = aw_383_cast_fp16)[name = tensor<string, []>("op_2866_cast_fp16")];
+            tensor<string, []> var_2868_equation_0 = const()[name = tensor<string, []>("op_2868_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2868_cast_fp16 = einsum(equation = var_2868_equation_0, values = (var_2802_cast_fp16_0, var_2851_cast_fp16))[name = tensor<string, []>("op_2868_cast_fp16")];
+            tensor<string, []> var_2870_equation_0 = const()[name = tensor<string, []>("op_2870_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2870_cast_fp16 = einsum(equation = var_2870_equation_0, values = (var_2802_cast_fp16_1, var_2852_cast_fp16))[name = tensor<string, []>("op_2870_cast_fp16")];
+            tensor<string, []> var_2872_equation_0 = const()[name = tensor<string, []>("op_2872_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2872_cast_fp16 = einsum(equation = var_2872_equation_0, values = (var_2802_cast_fp16_2, var_2853_cast_fp16))[name = tensor<string, []>("op_2872_cast_fp16")];
+            tensor<string, []> var_2874_equation_0 = const()[name = tensor<string, []>("op_2874_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2874_cast_fp16 = einsum(equation = var_2874_equation_0, values = (var_2802_cast_fp16_3, var_2854_cast_fp16))[name = tensor<string, []>("op_2874_cast_fp16")];
+            tensor<string, []> var_2876_equation_0 = const()[name = tensor<string, []>("op_2876_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2876_cast_fp16 = einsum(equation = var_2876_equation_0, values = (var_2802_cast_fp16_4, var_2855_cast_fp16))[name = tensor<string, []>("op_2876_cast_fp16")];
+            tensor<string, []> var_2878_equation_0 = const()[name = tensor<string, []>("op_2878_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2878_cast_fp16 = einsum(equation = var_2878_equation_0, values = (var_2802_cast_fp16_5, var_2856_cast_fp16))[name = tensor<string, []>("op_2878_cast_fp16")];
+            tensor<string, []> var_2880_equation_0 = const()[name = tensor<string, []>("op_2880_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2880_cast_fp16 = einsum(equation = var_2880_equation_0, values = (var_2802_cast_fp16_6, var_2857_cast_fp16))[name = tensor<string, []>("op_2880_cast_fp16")];
+            tensor<string, []> var_2882_equation_0 = const()[name = tensor<string, []>("op_2882_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2882_cast_fp16 = einsum(equation = var_2882_equation_0, values = (var_2802_cast_fp16_7, var_2858_cast_fp16))[name = tensor<string, []>("op_2882_cast_fp16")];
+            tensor<string, []> var_2884_equation_0 = const()[name = tensor<string, []>("op_2884_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2884_cast_fp16 = einsum(equation = var_2884_equation_0, values = (var_2802_cast_fp16_8, var_2859_cast_fp16))[name = tensor<string, []>("op_2884_cast_fp16")];
+            tensor<string, []> var_2886_equation_0 = const()[name = tensor<string, []>("op_2886_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2886_cast_fp16 = einsum(equation = var_2886_equation_0, values = (var_2802_cast_fp16_9, var_2860_cast_fp16))[name = tensor<string, []>("op_2886_cast_fp16")];
+            tensor<string, []> var_2888_equation_0 = const()[name = tensor<string, []>("op_2888_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2888_cast_fp16 = einsum(equation = var_2888_equation_0, values = (var_2802_cast_fp16_10, var_2861_cast_fp16))[name = tensor<string, []>("op_2888_cast_fp16")];
+            tensor<string, []> var_2890_equation_0 = const()[name = tensor<string, []>("op_2890_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2890_cast_fp16 = einsum(equation = var_2890_equation_0, values = (var_2802_cast_fp16_11, var_2862_cast_fp16))[name = tensor<string, []>("op_2890_cast_fp16")];
+            tensor<string, []> var_2892_equation_0 = const()[name = tensor<string, []>("op_2892_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2892_cast_fp16 = einsum(equation = var_2892_equation_0, values = (var_2802_cast_fp16_12, var_2863_cast_fp16))[name = tensor<string, []>("op_2892_cast_fp16")];
+            tensor<string, []> var_2894_equation_0 = const()[name = tensor<string, []>("op_2894_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2894_cast_fp16 = einsum(equation = var_2894_equation_0, values = (var_2802_cast_fp16_13, var_2864_cast_fp16))[name = tensor<string, []>("op_2894_cast_fp16")];
+            tensor<string, []> var_2896_equation_0 = const()[name = tensor<string, []>("op_2896_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2896_cast_fp16 = einsum(equation = var_2896_equation_0, values = (var_2802_cast_fp16_14, var_2865_cast_fp16))[name = tensor<string, []>("op_2896_cast_fp16")];
+            tensor<string, []> var_2898_equation_0 = const()[name = tensor<string, []>("op_2898_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2898_cast_fp16 = einsum(equation = var_2898_equation_0, values = (var_2802_cast_fp16_15, var_2866_cast_fp16))[name = tensor<string, []>("op_2898_cast_fp16")];
+            tensor<bool, []> input_115_interleave_0 = const()[name = tensor<string, []>("input_115_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_115_cast_fp16 = concat(axis = var_2715, interleave = input_115_interleave_0, values = (var_2868_cast_fp16, var_2870_cast_fp16, var_2872_cast_fp16, var_2874_cast_fp16, var_2876_cast_fp16, var_2878_cast_fp16, var_2880_cast_fp16, var_2882_cast_fp16, var_2884_cast_fp16, var_2886_cast_fp16, var_2888_cast_fp16, var_2890_cast_fp16, var_2892_cast_fp16, var_2894_cast_fp16, var_2896_cast_fp16, var_2898_cast_fp16))[name = tensor<string, []>("input_115_cast_fp16")];
+            tensor<string, []> var_2907_pad_type_0 = const()[name = tensor<string, []>("op_2907_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2907_strides_0 = const()[name = tensor<string, []>("op_2907_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2907_pad_0 = const()[name = tensor<string, []>("op_2907_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2907_dilations_0 = const()[name = tensor<string, []>("op_2907_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2907_groups_0 = const()[name = tensor<string, []>("op_2907_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_11_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(293264512)))];
+            tensor<fp16, [1024]> blocks_11_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295361728)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2907_cast_fp16 = conv(bias = blocks_11_attn_out_bias_to_fp16, dilations = var_2907_dilations_0, groups = var_2907_groups_0, pad = var_2907_pad_0, pad_type = var_2907_pad_type_0, strides = var_2907_strides_0, weight = blocks_11_attn_out_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("op_2907_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = var_2907_cast_fp16)[name = tensor<string, []>("inputs_47_cast_fp16")];
+            tensor<int32, [1]> input_117_axes_0 = const()[name = tensor<string, []>("input_117_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_117_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_117_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295363840)))];
+            tensor<fp16, [1024]> input_117_beta_0_to_fp16 = const()[name = tensor<string, []>("input_117_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295365952)))];
+            tensor<fp16, []> var_2917_to_fp16 = const()[name = tensor<string, []>("op_2917_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_117_cast_fp16 = layer_norm(axes = input_117_axes_0, beta = input_117_beta_0_to_fp16, epsilon = var_2917_to_fp16, gamma = input_117_gamma_0_to_fp16, x = inputs_47_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
+            tensor<string, []> input_119_pad_type_0 = const()[name = tensor<string, []>("input_119_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_119_strides_0 = const()[name = tensor<string, []>("input_119_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_119_pad_0 = const()[name = tensor<string, []>("input_119_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_119_dilations_0 = const()[name = tensor<string, []>("input_119_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_119_groups_0 = const()[name = tensor<string, []>("input_119_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_11_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(295368064)))];
+            tensor<fp16, [4096]> blocks_11_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303756736)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_119_cast_fp16 = conv(bias = blocks_11_mlp_0_bias_to_fp16, dilations = input_119_dilations_0, groups = input_119_groups_0, pad = input_119_pad_0, pad_type = input_119_pad_type_0, strides = input_119_strides_0, weight = blocks_11_mlp_0_weight_to_fp16, x = input_117_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")];
+            tensor<string, []> input_121_mode_0 = const()[name = tensor<string, []>("input_121_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_121_cast_fp16 = gelu(mode = input_121_mode_0, x = input_119_cast_fp16)[name = tensor<string, []>("input_121_cast_fp16")];
+            tensor<string, []> var_2943_pad_type_0 = const()[name = tensor<string, []>("op_2943_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2943_strides_0 = const()[name = tensor<string, []>("op_2943_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2943_pad_0 = const()[name = tensor<string, []>("op_2943_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2943_dilations_0 = const()[name = tensor<string, []>("op_2943_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2943_groups_0 = const()[name = tensor<string, []>("op_2943_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_11_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(303764992)))];
+            tensor<fp16, [1024]> blocks_11_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312153664)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_2943_cast_fp16 = conv(bias = blocks_11_mlp_2_bias_to_fp16, dilations = var_2943_dilations_0, groups = var_2943_groups_0, pad = var_2943_pad_0, pad_type = var_2943_pad_type_0, strides = var_2943_strides_0, weight = blocks_11_mlp_2_weight_to_fp16, x = input_121_cast_fp16)[name = tensor<string, []>("op_2943_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = var_2943_cast_fp16)[name = tensor<string, []>("inputs_49_cast_fp16")];
+            tensor<int32, []> var_2952 = const()[name = tensor<string, []>("op_2952"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_123_axes_0 = const()[name = tensor<string, []>("input_123_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_123_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_123_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312155776)))];
+            tensor<fp16, [1024]> input_123_beta_0_to_fp16 = const()[name = tensor<string, []>("input_123_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312157888)))];
+            tensor<fp16, []> var_2968_to_fp16 = const()[name = tensor<string, []>("op_2968_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_123_cast_fp16 = layer_norm(axes = input_123_axes_0, beta = input_123_beta_0_to_fp16, epsilon = var_2968_to_fp16, gamma = input_123_gamma_0_to_fp16, x = inputs_49_cast_fp16)[name = tensor<string, []>("input_123_cast_fp16")];
+            tensor<string, []> q_25_pad_type_0 = const()[name = tensor<string, []>("q_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_25_strides_0 = const()[name = tensor<string, []>("q_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_25_pad_0 = const()[name = tensor<string, []>("q_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_25_dilations_0 = const()[name = tensor<string, []>("q_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_25_groups_0 = const()[name = tensor<string, []>("q_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_3003_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3003_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(312160000)))];
+            tensor<fp16, [1024]> var_3003_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3003_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(314257216)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3003_cast_fp16 = conv(bias = var_3003_bias_0_to_fp16, dilations = q_25_dilations_0, groups = q_25_groups_0, pad = q_25_pad_0, pad_type = q_25_pad_type_0, strides = q_25_strides_0, weight = var_3003_weight_0_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("op_3003_cast_fp16")];
+            tensor<string, []> k_25_pad_type_0 = const()[name = tensor<string, []>("k_25_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_25_strides_0 = const()[name = tensor<string, []>("k_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_25_pad_0 = const()[name = tensor<string, []>("k_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_25_dilations_0 = const()[name = tensor<string, []>("k_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_25_groups_0 = const()[name = tensor<string, []>("k_25_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_12_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(314259328)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_25_cast_fp16 = conv(dilations = k_25_dilations_0, groups = k_25_groups_0, pad = k_25_pad_0, pad_type = k_25_pad_type_0, strides = k_25_strides_0, weight = blocks_12_attn_key_weight_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("k_25_cast_fp16")];
+            tensor<string, []> var_3001_pad_type_0 = const()[name = tensor<string, []>("op_3001_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3001_strides_0 = const()[name = tensor<string, []>("op_3001_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3001_pad_0 = const()[name = tensor<string, []>("op_3001_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3001_dilations_0 = const()[name = tensor<string, []>("op_3001_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3001_groups_0 = const()[name = tensor<string, []>("op_3001_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_12_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(316356544)))];
+            tensor<fp16, [1024]> blocks_12_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(318453760)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3001_cast_fp16 = conv(bias = blocks_12_attn_value_bias_to_fp16, dilations = var_3001_dilations_0, groups = var_3001_groups_0, pad = var_3001_pad_0, pad_type = var_3001_pad_type_0, strides = var_3001_strides_0, weight = blocks_12_attn_value_weight_to_fp16, x = input_123_cast_fp16)[name = tensor<string, []>("op_3001_cast_fp16")];
+            tensor<int32, [16]> tile_36 = const()[name = tensor<string, []>("tile_36"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3004_axis_0 = const()[name = tensor<string, []>("op_3004_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3004_cast_fp16_15 = split(axis = var_3004_axis_0, split_sizes = tile_36, x = var_3003_cast_fp16)[name = tensor<string, []>("op_3004_cast_fp16")];
+            tensor<int32, [4]> var_3021_perm_0 = const()[name = tensor<string, []>("op_3021_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_37 = const()[name = tensor<string, []>("tile_37"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3022_axis_0 = const()[name = tensor<string, []>("op_3022_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_3021_cast_fp16 = transpose(perm = var_3021_perm_0, x = k_25_cast_fp16)[name = tensor<string, []>("transpose_12")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3022_cast_fp16_15 = split(axis = var_3022_axis_0, split_sizes = tile_37, x = var_3021_cast_fp16)[name = tensor<string, []>("op_3022_cast_fp16")];
+            tensor<int32, [16]> tile_38 = const()[name = tensor<string, []>("tile_38"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3039_axis_0 = const()[name = tensor<string, []>("op_3039_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3039_cast_fp16_15 = split(axis = var_3039_axis_0, split_sizes = tile_38, x = var_3001_cast_fp16)[name = tensor<string, []>("op_3039_cast_fp16")];
+            tensor<string, []> aw_385_equation_0 = const()[name = tensor<string, []>("aw_385_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_385_cast_fp16 = einsum(equation = aw_385_equation_0, values = (var_3022_cast_fp16_0, var_3004_cast_fp16_0))[name = tensor<string, []>("aw_385_cast_fp16")];
+            tensor<string, []> aw_387_equation_0 = const()[name = tensor<string, []>("aw_387_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_387_cast_fp16 = einsum(equation = aw_387_equation_0, values = (var_3022_cast_fp16_1, var_3004_cast_fp16_1))[name = tensor<string, []>("aw_387_cast_fp16")];
+            tensor<string, []> aw_389_equation_0 = const()[name = tensor<string, []>("aw_389_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_389_cast_fp16 = einsum(equation = aw_389_equation_0, values = (var_3022_cast_fp16_2, var_3004_cast_fp16_2))[name = tensor<string, []>("aw_389_cast_fp16")];
+            tensor<string, []> aw_391_equation_0 = const()[name = tensor<string, []>("aw_391_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_391_cast_fp16 = einsum(equation = aw_391_equation_0, values = (var_3022_cast_fp16_3, var_3004_cast_fp16_3))[name = tensor<string, []>("aw_391_cast_fp16")];
+            tensor<string, []> aw_393_equation_0 = const()[name = tensor<string, []>("aw_393_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_393_cast_fp16 = einsum(equation = aw_393_equation_0, values = (var_3022_cast_fp16_4, var_3004_cast_fp16_4))[name = tensor<string, []>("aw_393_cast_fp16")];
+            tensor<string, []> aw_395_equation_0 = const()[name = tensor<string, []>("aw_395_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_395_cast_fp16 = einsum(equation = aw_395_equation_0, values = (var_3022_cast_fp16_5, var_3004_cast_fp16_5))[name = tensor<string, []>("aw_395_cast_fp16")];
+            tensor<string, []> aw_397_equation_0 = const()[name = tensor<string, []>("aw_397_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_397_cast_fp16 = einsum(equation = aw_397_equation_0, values = (var_3022_cast_fp16_6, var_3004_cast_fp16_6))[name = tensor<string, []>("aw_397_cast_fp16")];
+            tensor<string, []> aw_399_equation_0 = const()[name = tensor<string, []>("aw_399_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_399_cast_fp16 = einsum(equation = aw_399_equation_0, values = (var_3022_cast_fp16_7, var_3004_cast_fp16_7))[name = tensor<string, []>("aw_399_cast_fp16")];
+            tensor<string, []> aw_401_equation_0 = const()[name = tensor<string, []>("aw_401_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_401_cast_fp16 = einsum(equation = aw_401_equation_0, values = (var_3022_cast_fp16_8, var_3004_cast_fp16_8))[name = tensor<string, []>("aw_401_cast_fp16")];
+            tensor<string, []> aw_403_equation_0 = const()[name = tensor<string, []>("aw_403_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_403_cast_fp16 = einsum(equation = aw_403_equation_0, values = (var_3022_cast_fp16_9, var_3004_cast_fp16_9))[name = tensor<string, []>("aw_403_cast_fp16")];
+            tensor<string, []> aw_405_equation_0 = const()[name = tensor<string, []>("aw_405_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_405_cast_fp16 = einsum(equation = aw_405_equation_0, values = (var_3022_cast_fp16_10, var_3004_cast_fp16_10))[name = tensor<string, []>("aw_405_cast_fp16")];
+            tensor<string, []> aw_407_equation_0 = const()[name = tensor<string, []>("aw_407_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_407_cast_fp16 = einsum(equation = aw_407_equation_0, values = (var_3022_cast_fp16_11, var_3004_cast_fp16_11))[name = tensor<string, []>("aw_407_cast_fp16")];
+            tensor<string, []> aw_409_equation_0 = const()[name = tensor<string, []>("aw_409_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_409_cast_fp16 = einsum(equation = aw_409_equation_0, values = (var_3022_cast_fp16_12, var_3004_cast_fp16_12))[name = tensor<string, []>("aw_409_cast_fp16")];
+            tensor<string, []> aw_411_equation_0 = const()[name = tensor<string, []>("aw_411_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_411_cast_fp16 = einsum(equation = aw_411_equation_0, values = (var_3022_cast_fp16_13, var_3004_cast_fp16_13))[name = tensor<string, []>("aw_411_cast_fp16")];
+            tensor<string, []> aw_413_equation_0 = const()[name = tensor<string, []>("aw_413_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_413_cast_fp16 = einsum(equation = aw_413_equation_0, values = (var_3022_cast_fp16_14, var_3004_cast_fp16_14))[name = tensor<string, []>("aw_413_cast_fp16")];
+            tensor<string, []> aw_415_equation_0 = const()[name = tensor<string, []>("aw_415_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_415_cast_fp16 = einsum(equation = aw_415_equation_0, values = (var_3022_cast_fp16_15, var_3004_cast_fp16_15))[name = tensor<string, []>("aw_415_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3088_cast_fp16 = softmax(axis = var_2952, x = aw_385_cast_fp16)[name = tensor<string, []>("op_3088_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3089_cast_fp16 = softmax(axis = var_2952, x = aw_387_cast_fp16)[name = tensor<string, []>("op_3089_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3090_cast_fp16 = softmax(axis = var_2952, x = aw_389_cast_fp16)[name = tensor<string, []>("op_3090_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3091_cast_fp16 = softmax(axis = var_2952, x = aw_391_cast_fp16)[name = tensor<string, []>("op_3091_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3092_cast_fp16 = softmax(axis = var_2952, x = aw_393_cast_fp16)[name = tensor<string, []>("op_3092_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3093_cast_fp16 = softmax(axis = var_2952, x = aw_395_cast_fp16)[name = tensor<string, []>("op_3093_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3094_cast_fp16 = softmax(axis = var_2952, x = aw_397_cast_fp16)[name = tensor<string, []>("op_3094_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3095_cast_fp16 = softmax(axis = var_2952, x = aw_399_cast_fp16)[name = tensor<string, []>("op_3095_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3096_cast_fp16 = softmax(axis = var_2952, x = aw_401_cast_fp16)[name = tensor<string, []>("op_3096_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3097_cast_fp16 = softmax(axis = var_2952, x = aw_403_cast_fp16)[name = tensor<string, []>("op_3097_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3098_cast_fp16 = softmax(axis = var_2952, x = aw_405_cast_fp16)[name = tensor<string, []>("op_3098_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3099_cast_fp16 = softmax(axis = var_2952, x = aw_407_cast_fp16)[name = tensor<string, []>("op_3099_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3100_cast_fp16 = softmax(axis = var_2952, x = aw_409_cast_fp16)[name = tensor<string, []>("op_3100_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3101_cast_fp16 = softmax(axis = var_2952, x = aw_411_cast_fp16)[name = tensor<string, []>("op_3101_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3102_cast_fp16 = softmax(axis = var_2952, x = aw_413_cast_fp16)[name = tensor<string, []>("op_3102_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3103_cast_fp16 = softmax(axis = var_2952, x = aw_415_cast_fp16)[name = tensor<string, []>("op_3103_cast_fp16")];
+            tensor<string, []> var_3105_equation_0 = const()[name = tensor<string, []>("op_3105_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3105_cast_fp16 = einsum(equation = var_3105_equation_0, values = (var_3039_cast_fp16_0, var_3088_cast_fp16))[name = tensor<string, []>("op_3105_cast_fp16")];
+            tensor<string, []> var_3107_equation_0 = const()[name = tensor<string, []>("op_3107_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3107_cast_fp16 = einsum(equation = var_3107_equation_0, values = (var_3039_cast_fp16_1, var_3089_cast_fp16))[name = tensor<string, []>("op_3107_cast_fp16")];
+            tensor<string, []> var_3109_equation_0 = const()[name = tensor<string, []>("op_3109_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3109_cast_fp16 = einsum(equation = var_3109_equation_0, values = (var_3039_cast_fp16_2, var_3090_cast_fp16))[name = tensor<string, []>("op_3109_cast_fp16")];
+            tensor<string, []> var_3111_equation_0 = const()[name = tensor<string, []>("op_3111_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3111_cast_fp16 = einsum(equation = var_3111_equation_0, values = (var_3039_cast_fp16_3, var_3091_cast_fp16))[name = tensor<string, []>("op_3111_cast_fp16")];
+            tensor<string, []> var_3113_equation_0 = const()[name = tensor<string, []>("op_3113_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3113_cast_fp16 = einsum(equation = var_3113_equation_0, values = (var_3039_cast_fp16_4, var_3092_cast_fp16))[name = tensor<string, []>("op_3113_cast_fp16")];
+            tensor<string, []> var_3115_equation_0 = const()[name = tensor<string, []>("op_3115_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3115_cast_fp16 = einsum(equation = var_3115_equation_0, values = (var_3039_cast_fp16_5, var_3093_cast_fp16))[name = tensor<string, []>("op_3115_cast_fp16")];
+            tensor<string, []> var_3117_equation_0 = const()[name = tensor<string, []>("op_3117_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3117_cast_fp16 = einsum(equation = var_3117_equation_0, values = (var_3039_cast_fp16_6, var_3094_cast_fp16))[name = tensor<string, []>("op_3117_cast_fp16")];
+            tensor<string, []> var_3119_equation_0 = const()[name = tensor<string, []>("op_3119_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3119_cast_fp16 = einsum(equation = var_3119_equation_0, values = (var_3039_cast_fp16_7, var_3095_cast_fp16))[name = tensor<string, []>("op_3119_cast_fp16")];
+            tensor<string, []> var_3121_equation_0 = const()[name = tensor<string, []>("op_3121_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3121_cast_fp16 = einsum(equation = var_3121_equation_0, values = (var_3039_cast_fp16_8, var_3096_cast_fp16))[name = tensor<string, []>("op_3121_cast_fp16")];
+            tensor<string, []> var_3123_equation_0 = const()[name = tensor<string, []>("op_3123_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3123_cast_fp16 = einsum(equation = var_3123_equation_0, values = (var_3039_cast_fp16_9, var_3097_cast_fp16))[name = tensor<string, []>("op_3123_cast_fp16")];
+            tensor<string, []> var_3125_equation_0 = const()[name = tensor<string, []>("op_3125_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3125_cast_fp16 = einsum(equation = var_3125_equation_0, values = (var_3039_cast_fp16_10, var_3098_cast_fp16))[name = tensor<string, []>("op_3125_cast_fp16")];
+            tensor<string, []> var_3127_equation_0 = const()[name = tensor<string, []>("op_3127_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3127_cast_fp16 = einsum(equation = var_3127_equation_0, values = (var_3039_cast_fp16_11, var_3099_cast_fp16))[name = tensor<string, []>("op_3127_cast_fp16")];
+            tensor<string, []> var_3129_equation_0 = const()[name = tensor<string, []>("op_3129_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3129_cast_fp16 = einsum(equation = var_3129_equation_0, values = (var_3039_cast_fp16_12, var_3100_cast_fp16))[name = tensor<string, []>("op_3129_cast_fp16")];
+            tensor<string, []> var_3131_equation_0 = const()[name = tensor<string, []>("op_3131_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3131_cast_fp16 = einsum(equation = var_3131_equation_0, values = (var_3039_cast_fp16_13, var_3101_cast_fp16))[name = tensor<string, []>("op_3131_cast_fp16")];
+            tensor<string, []> var_3133_equation_0 = const()[name = tensor<string, []>("op_3133_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3133_cast_fp16 = einsum(equation = var_3133_equation_0, values = (var_3039_cast_fp16_14, var_3102_cast_fp16))[name = tensor<string, []>("op_3133_cast_fp16")];
+            tensor<string, []> var_3135_equation_0 = const()[name = tensor<string, []>("op_3135_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3135_cast_fp16 = einsum(equation = var_3135_equation_0, values = (var_3039_cast_fp16_15, var_3103_cast_fp16))[name = tensor<string, []>("op_3135_cast_fp16")];
+            tensor<bool, []> input_125_interleave_0 = const()[name = tensor<string, []>("input_125_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_125_cast_fp16 = concat(axis = var_2952, interleave = input_125_interleave_0, values = (var_3105_cast_fp16, var_3107_cast_fp16, var_3109_cast_fp16, var_3111_cast_fp16, var_3113_cast_fp16, var_3115_cast_fp16, var_3117_cast_fp16, var_3119_cast_fp16, var_3121_cast_fp16, var_3123_cast_fp16, var_3125_cast_fp16, var_3127_cast_fp16, var_3129_cast_fp16, var_3131_cast_fp16, var_3133_cast_fp16, var_3135_cast_fp16))[name = tensor<string, []>("input_125_cast_fp16")];
+            tensor<string, []> var_3144_pad_type_0 = const()[name = tensor<string, []>("op_3144_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3144_strides_0 = const()[name = tensor<string, []>("op_3144_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3144_pad_0 = const()[name = tensor<string, []>("op_3144_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3144_dilations_0 = const()[name = tensor<string, []>("op_3144_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3144_groups_0 = const()[name = tensor<string, []>("op_3144_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_12_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(318455872)))];
+            tensor<fp16, [1024]> blocks_12_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(320553088)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3144_cast_fp16 = conv(bias = blocks_12_attn_out_bias_to_fp16, dilations = var_3144_dilations_0, groups = var_3144_groups_0, pad = var_3144_pad_0, pad_type = var_3144_pad_type_0, strides = var_3144_strides_0, weight = blocks_12_attn_out_weight_to_fp16, x = input_125_cast_fp16)[name = tensor<string, []>("op_3144_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = var_3144_cast_fp16)[name = tensor<string, []>("inputs_51_cast_fp16")];
+            tensor<int32, [1]> input_127_axes_0 = const()[name = tensor<string, []>("input_127_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_127_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_127_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(320555200)))];
+            tensor<fp16, [1024]> input_127_beta_0_to_fp16 = const()[name = tensor<string, []>("input_127_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(320557312)))];
+            tensor<fp16, []> var_3154_to_fp16 = const()[name = tensor<string, []>("op_3154_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_127_cast_fp16 = layer_norm(axes = input_127_axes_0, beta = input_127_beta_0_to_fp16, epsilon = var_3154_to_fp16, gamma = input_127_gamma_0_to_fp16, x = inputs_51_cast_fp16)[name = tensor<string, []>("input_127_cast_fp16")];
+            tensor<string, []> input_129_pad_type_0 = const()[name = tensor<string, []>("input_129_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_129_strides_0 = const()[name = tensor<string, []>("input_129_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_129_pad_0 = const()[name = tensor<string, []>("input_129_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_129_dilations_0 = const()[name = tensor<string, []>("input_129_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_129_groups_0 = const()[name = tensor<string, []>("input_129_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_12_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(320559424)))];
+            tensor<fp16, [4096]> blocks_12_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(328948096)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_129_cast_fp16 = conv(bias = blocks_12_mlp_0_bias_to_fp16, dilations = input_129_dilations_0, groups = input_129_groups_0, pad = input_129_pad_0, pad_type = input_129_pad_type_0, strides = input_129_strides_0, weight = blocks_12_mlp_0_weight_to_fp16, x = input_127_cast_fp16)[name = tensor<string, []>("input_129_cast_fp16")];
+            tensor<string, []> input_131_mode_0 = const()[name = tensor<string, []>("input_131_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_131_cast_fp16 = gelu(mode = input_131_mode_0, x = input_129_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")];
+            tensor<string, []> var_3180_pad_type_0 = const()[name = tensor<string, []>("op_3180_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3180_strides_0 = const()[name = tensor<string, []>("op_3180_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3180_pad_0 = const()[name = tensor<string, []>("op_3180_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3180_dilations_0 = const()[name = tensor<string, []>("op_3180_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3180_groups_0 = const()[name = tensor<string, []>("op_3180_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_12_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(328956352)))];
+            tensor<fp16, [1024]> blocks_12_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_12_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337345024)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3180_cast_fp16 = conv(bias = blocks_12_mlp_2_bias_to_fp16, dilations = var_3180_dilations_0, groups = var_3180_groups_0, pad = var_3180_pad_0, pad_type = var_3180_pad_type_0, strides = var_3180_strides_0, weight = blocks_12_mlp_2_weight_to_fp16, x = input_131_cast_fp16)[name = tensor<string, []>("op_3180_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = var_3180_cast_fp16)[name = tensor<string, []>("inputs_53_cast_fp16")];
+            tensor<int32, []> var_3189 = const()[name = tensor<string, []>("op_3189"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_133_axes_0 = const()[name = tensor<string, []>("input_133_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_133_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_133_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337347136)))];
+            tensor<fp16, [1024]> input_133_beta_0_to_fp16 = const()[name = tensor<string, []>("input_133_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337349248)))];
+            tensor<fp16, []> var_3205_to_fp16 = const()[name = tensor<string, []>("op_3205_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_133_cast_fp16 = layer_norm(axes = input_133_axes_0, beta = input_133_beta_0_to_fp16, epsilon = var_3205_to_fp16, gamma = input_133_gamma_0_to_fp16, x = inputs_53_cast_fp16)[name = tensor<string, []>("input_133_cast_fp16")];
+            tensor<string, []> q_27_pad_type_0 = const()[name = tensor<string, []>("q_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_27_strides_0 = const()[name = tensor<string, []>("q_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_27_pad_0 = const()[name = tensor<string, []>("q_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_27_dilations_0 = const()[name = tensor<string, []>("q_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_27_groups_0 = const()[name = tensor<string, []>("q_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_3240_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3240_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(337351360)))];
+            tensor<fp16, [1024]> var_3240_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3240_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339448576)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3240_cast_fp16 = conv(bias = var_3240_bias_0_to_fp16, dilations = q_27_dilations_0, groups = q_27_groups_0, pad = q_27_pad_0, pad_type = q_27_pad_type_0, strides = q_27_strides_0, weight = var_3240_weight_0_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("op_3240_cast_fp16")];
+            tensor<string, []> k_27_pad_type_0 = const()[name = tensor<string, []>("k_27_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_27_strides_0 = const()[name = tensor<string, []>("k_27_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_27_pad_0 = const()[name = tensor<string, []>("k_27_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_27_dilations_0 = const()[name = tensor<string, []>("k_27_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_27_groups_0 = const()[name = tensor<string, []>("k_27_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_13_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(339450688)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_27_cast_fp16 = conv(dilations = k_27_dilations_0, groups = k_27_groups_0, pad = k_27_pad_0, pad_type = k_27_pad_type_0, strides = k_27_strides_0, weight = blocks_13_attn_key_weight_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("k_27_cast_fp16")];
+            tensor<string, []> var_3238_pad_type_0 = const()[name = tensor<string, []>("op_3238_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3238_strides_0 = const()[name = tensor<string, []>("op_3238_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3238_pad_0 = const()[name = tensor<string, []>("op_3238_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3238_dilations_0 = const()[name = tensor<string, []>("op_3238_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3238_groups_0 = const()[name = tensor<string, []>("op_3238_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_13_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(341547904)))];
+            tensor<fp16, [1024]> blocks_13_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(343645120)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3238_cast_fp16 = conv(bias = blocks_13_attn_value_bias_to_fp16, dilations = var_3238_dilations_0, groups = var_3238_groups_0, pad = var_3238_pad_0, pad_type = var_3238_pad_type_0, strides = var_3238_strides_0, weight = blocks_13_attn_value_weight_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("op_3238_cast_fp16")];
+            tensor<int32, [16]> tile_39 = const()[name = tensor<string, []>("tile_39"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3241_axis_0 = const()[name = tensor<string, []>("op_3241_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3241_cast_fp16_15 = split(axis = var_3241_axis_0, split_sizes = tile_39, x = var_3240_cast_fp16)[name = tensor<string, []>("op_3241_cast_fp16")];
+            tensor<int32, [4]> var_3258_perm_0 = const()[name = tensor<string, []>("op_3258_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_40 = const()[name = tensor<string, []>("tile_40"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3259_axis_0 = const()[name = tensor<string, []>("op_3259_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_3258_cast_fp16 = transpose(perm = var_3258_perm_0, x = k_27_cast_fp16)[name = tensor<string, []>("transpose_11")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3259_cast_fp16_15 = split(axis = var_3259_axis_0, split_sizes = tile_40, x = var_3258_cast_fp16)[name = tensor<string, []>("op_3259_cast_fp16")];
+            tensor<int32, [16]> tile_41 = const()[name = tensor<string, []>("tile_41"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3276_axis_0 = const()[name = tensor<string, []>("op_3276_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3276_cast_fp16_15 = split(axis = var_3276_axis_0, split_sizes = tile_41, x = var_3238_cast_fp16)[name = tensor<string, []>("op_3276_cast_fp16")];
+            tensor<string, []> aw_417_equation_0 = const()[name = tensor<string, []>("aw_417_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_417_cast_fp16 = einsum(equation = aw_417_equation_0, values = (var_3259_cast_fp16_0, var_3241_cast_fp16_0))[name = tensor<string, []>("aw_417_cast_fp16")];
+            tensor<string, []> aw_419_equation_0 = const()[name = tensor<string, []>("aw_419_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_419_cast_fp16 = einsum(equation = aw_419_equation_0, values = (var_3259_cast_fp16_1, var_3241_cast_fp16_1))[name = tensor<string, []>("aw_419_cast_fp16")];
+            tensor<string, []> aw_421_equation_0 = const()[name = tensor<string, []>("aw_421_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_421_cast_fp16 = einsum(equation = aw_421_equation_0, values = (var_3259_cast_fp16_2, var_3241_cast_fp16_2))[name = tensor<string, []>("aw_421_cast_fp16")];
+            tensor<string, []> aw_423_equation_0 = const()[name = tensor<string, []>("aw_423_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_423_cast_fp16 = einsum(equation = aw_423_equation_0, values = (var_3259_cast_fp16_3, var_3241_cast_fp16_3))[name = tensor<string, []>("aw_423_cast_fp16")];
+            tensor<string, []> aw_425_equation_0 = const()[name = tensor<string, []>("aw_425_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_425_cast_fp16 = einsum(equation = aw_425_equation_0, values = (var_3259_cast_fp16_4, var_3241_cast_fp16_4))[name = tensor<string, []>("aw_425_cast_fp16")];
+            tensor<string, []> aw_427_equation_0 = const()[name = tensor<string, []>("aw_427_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_427_cast_fp16 = einsum(equation = aw_427_equation_0, values = (var_3259_cast_fp16_5, var_3241_cast_fp16_5))[name = tensor<string, []>("aw_427_cast_fp16")];
+            tensor<string, []> aw_429_equation_0 = const()[name = tensor<string, []>("aw_429_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_429_cast_fp16 = einsum(equation = aw_429_equation_0, values = (var_3259_cast_fp16_6, var_3241_cast_fp16_6))[name = tensor<string, []>("aw_429_cast_fp16")];
+            tensor<string, []> aw_431_equation_0 = const()[name = tensor<string, []>("aw_431_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_431_cast_fp16 = einsum(equation = aw_431_equation_0, values = (var_3259_cast_fp16_7, var_3241_cast_fp16_7))[name = tensor<string, []>("aw_431_cast_fp16")];
+            tensor<string, []> aw_433_equation_0 = const()[name = tensor<string, []>("aw_433_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_433_cast_fp16 = einsum(equation = aw_433_equation_0, values = (var_3259_cast_fp16_8, var_3241_cast_fp16_8))[name = tensor<string, []>("aw_433_cast_fp16")];
+            tensor<string, []> aw_435_equation_0 = const()[name = tensor<string, []>("aw_435_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_435_cast_fp16 = einsum(equation = aw_435_equation_0, values = (var_3259_cast_fp16_9, var_3241_cast_fp16_9))[name = tensor<string, []>("aw_435_cast_fp16")];
+            tensor<string, []> aw_437_equation_0 = const()[name = tensor<string, []>("aw_437_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_437_cast_fp16 = einsum(equation = aw_437_equation_0, values = (var_3259_cast_fp16_10, var_3241_cast_fp16_10))[name = tensor<string, []>("aw_437_cast_fp16")];
+            tensor<string, []> aw_439_equation_0 = const()[name = tensor<string, []>("aw_439_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_439_cast_fp16 = einsum(equation = aw_439_equation_0, values = (var_3259_cast_fp16_11, var_3241_cast_fp16_11))[name = tensor<string, []>("aw_439_cast_fp16")];
+            tensor<string, []> aw_441_equation_0 = const()[name = tensor<string, []>("aw_441_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_441_cast_fp16 = einsum(equation = aw_441_equation_0, values = (var_3259_cast_fp16_12, var_3241_cast_fp16_12))[name = tensor<string, []>("aw_441_cast_fp16")];
+            tensor<string, []> aw_443_equation_0 = const()[name = tensor<string, []>("aw_443_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_443_cast_fp16 = einsum(equation = aw_443_equation_0, values = (var_3259_cast_fp16_13, var_3241_cast_fp16_13))[name = tensor<string, []>("aw_443_cast_fp16")];
+            tensor<string, []> aw_445_equation_0 = const()[name = tensor<string, []>("aw_445_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_445_cast_fp16 = einsum(equation = aw_445_equation_0, values = (var_3259_cast_fp16_14, var_3241_cast_fp16_14))[name = tensor<string, []>("aw_445_cast_fp16")];
+            tensor<string, []> aw_447_equation_0 = const()[name = tensor<string, []>("aw_447_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_447_cast_fp16 = einsum(equation = aw_447_equation_0, values = (var_3259_cast_fp16_15, var_3241_cast_fp16_15))[name = tensor<string, []>("aw_447_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3325_cast_fp16 = softmax(axis = var_3189, x = aw_417_cast_fp16)[name = tensor<string, []>("op_3325_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3326_cast_fp16 = softmax(axis = var_3189, x = aw_419_cast_fp16)[name = tensor<string, []>("op_3326_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3327_cast_fp16 = softmax(axis = var_3189, x = aw_421_cast_fp16)[name = tensor<string, []>("op_3327_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3328_cast_fp16 = softmax(axis = var_3189, x = aw_423_cast_fp16)[name = tensor<string, []>("op_3328_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3329_cast_fp16 = softmax(axis = var_3189, x = aw_425_cast_fp16)[name = tensor<string, []>("op_3329_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3330_cast_fp16 = softmax(axis = var_3189, x = aw_427_cast_fp16)[name = tensor<string, []>("op_3330_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3331_cast_fp16 = softmax(axis = var_3189, x = aw_429_cast_fp16)[name = tensor<string, []>("op_3331_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3332_cast_fp16 = softmax(axis = var_3189, x = aw_431_cast_fp16)[name = tensor<string, []>("op_3332_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3333_cast_fp16 = softmax(axis = var_3189, x = aw_433_cast_fp16)[name = tensor<string, []>("op_3333_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3334_cast_fp16 = softmax(axis = var_3189, x = aw_435_cast_fp16)[name = tensor<string, []>("op_3334_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3335_cast_fp16 = softmax(axis = var_3189, x = aw_437_cast_fp16)[name = tensor<string, []>("op_3335_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3336_cast_fp16 = softmax(axis = var_3189, x = aw_439_cast_fp16)[name = tensor<string, []>("op_3336_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3337_cast_fp16 = softmax(axis = var_3189, x = aw_441_cast_fp16)[name = tensor<string, []>("op_3337_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3338_cast_fp16 = softmax(axis = var_3189, x = aw_443_cast_fp16)[name = tensor<string, []>("op_3338_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3339_cast_fp16 = softmax(axis = var_3189, x = aw_445_cast_fp16)[name = tensor<string, []>("op_3339_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3340_cast_fp16 = softmax(axis = var_3189, x = aw_447_cast_fp16)[name = tensor<string, []>("op_3340_cast_fp16")];
+            tensor<string, []> var_3342_equation_0 = const()[name = tensor<string, []>("op_3342_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3342_cast_fp16 = einsum(equation = var_3342_equation_0, values = (var_3276_cast_fp16_0, var_3325_cast_fp16))[name = tensor<string, []>("op_3342_cast_fp16")];
+            tensor<string, []> var_3344_equation_0 = const()[name = tensor<string, []>("op_3344_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3344_cast_fp16 = einsum(equation = var_3344_equation_0, values = (var_3276_cast_fp16_1, var_3326_cast_fp16))[name = tensor<string, []>("op_3344_cast_fp16")];
+            tensor<string, []> var_3346_equation_0 = const()[name = tensor<string, []>("op_3346_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3346_cast_fp16 = einsum(equation = var_3346_equation_0, values = (var_3276_cast_fp16_2, var_3327_cast_fp16))[name = tensor<string, []>("op_3346_cast_fp16")];
+            tensor<string, []> var_3348_equation_0 = const()[name = tensor<string, []>("op_3348_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3348_cast_fp16 = einsum(equation = var_3348_equation_0, values = (var_3276_cast_fp16_3, var_3328_cast_fp16))[name = tensor<string, []>("op_3348_cast_fp16")];
+            tensor<string, []> var_3350_equation_0 = const()[name = tensor<string, []>("op_3350_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3350_cast_fp16 = einsum(equation = var_3350_equation_0, values = (var_3276_cast_fp16_4, var_3329_cast_fp16))[name = tensor<string, []>("op_3350_cast_fp16")];
+            tensor<string, []> var_3352_equation_0 = const()[name = tensor<string, []>("op_3352_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3352_cast_fp16 = einsum(equation = var_3352_equation_0, values = (var_3276_cast_fp16_5, var_3330_cast_fp16))[name = tensor<string, []>("op_3352_cast_fp16")];
+            tensor<string, []> var_3354_equation_0 = const()[name = tensor<string, []>("op_3354_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3354_cast_fp16 = einsum(equation = var_3354_equation_0, values = (var_3276_cast_fp16_6, var_3331_cast_fp16))[name = tensor<string, []>("op_3354_cast_fp16")];
+            tensor<string, []> var_3356_equation_0 = const()[name = tensor<string, []>("op_3356_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3356_cast_fp16 = einsum(equation = var_3356_equation_0, values = (var_3276_cast_fp16_7, var_3332_cast_fp16))[name = tensor<string, []>("op_3356_cast_fp16")];
+            tensor<string, []> var_3358_equation_0 = const()[name = tensor<string, []>("op_3358_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3358_cast_fp16 = einsum(equation = var_3358_equation_0, values = (var_3276_cast_fp16_8, var_3333_cast_fp16))[name = tensor<string, []>("op_3358_cast_fp16")];
+            tensor<string, []> var_3360_equation_0 = const()[name = tensor<string, []>("op_3360_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3360_cast_fp16 = einsum(equation = var_3360_equation_0, values = (var_3276_cast_fp16_9, var_3334_cast_fp16))[name = tensor<string, []>("op_3360_cast_fp16")];
+            tensor<string, []> var_3362_equation_0 = const()[name = tensor<string, []>("op_3362_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3362_cast_fp16 = einsum(equation = var_3362_equation_0, values = (var_3276_cast_fp16_10, var_3335_cast_fp16))[name = tensor<string, []>("op_3362_cast_fp16")];
+            tensor<string, []> var_3364_equation_0 = const()[name = tensor<string, []>("op_3364_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3364_cast_fp16 = einsum(equation = var_3364_equation_0, values = (var_3276_cast_fp16_11, var_3336_cast_fp16))[name = tensor<string, []>("op_3364_cast_fp16")];
+            tensor<string, []> var_3366_equation_0 = const()[name = tensor<string, []>("op_3366_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3366_cast_fp16 = einsum(equation = var_3366_equation_0, values = (var_3276_cast_fp16_12, var_3337_cast_fp16))[name = tensor<string, []>("op_3366_cast_fp16")];
+            tensor<string, []> var_3368_equation_0 = const()[name = tensor<string, []>("op_3368_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3368_cast_fp16 = einsum(equation = var_3368_equation_0, values = (var_3276_cast_fp16_13, var_3338_cast_fp16))[name = tensor<string, []>("op_3368_cast_fp16")];
+            tensor<string, []> var_3370_equation_0 = const()[name = tensor<string, []>("op_3370_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3370_cast_fp16 = einsum(equation = var_3370_equation_0, values = (var_3276_cast_fp16_14, var_3339_cast_fp16))[name = tensor<string, []>("op_3370_cast_fp16")];
+            tensor<string, []> var_3372_equation_0 = const()[name = tensor<string, []>("op_3372_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3372_cast_fp16 = einsum(equation = var_3372_equation_0, values = (var_3276_cast_fp16_15, var_3340_cast_fp16))[name = tensor<string, []>("op_3372_cast_fp16")];
+            tensor<bool, []> input_135_interleave_0 = const()[name = tensor<string, []>("input_135_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_135_cast_fp16 = concat(axis = var_3189, interleave = input_135_interleave_0, values = (var_3342_cast_fp16, var_3344_cast_fp16, var_3346_cast_fp16, var_3348_cast_fp16, var_3350_cast_fp16, var_3352_cast_fp16, var_3354_cast_fp16, var_3356_cast_fp16, var_3358_cast_fp16, var_3360_cast_fp16, var_3362_cast_fp16, var_3364_cast_fp16, var_3366_cast_fp16, var_3368_cast_fp16, var_3370_cast_fp16, var_3372_cast_fp16))[name = tensor<string, []>("input_135_cast_fp16")];
+            tensor<string, []> var_3381_pad_type_0 = const()[name = tensor<string, []>("op_3381_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3381_strides_0 = const()[name = tensor<string, []>("op_3381_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3381_pad_0 = const()[name = tensor<string, []>("op_3381_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3381_dilations_0 = const()[name = tensor<string, []>("op_3381_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3381_groups_0 = const()[name = tensor<string, []>("op_3381_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_13_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(343647232)))];
+            tensor<fp16, [1024]> blocks_13_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(345744448)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3381_cast_fp16 = conv(bias = blocks_13_attn_out_bias_to_fp16, dilations = var_3381_dilations_0, groups = var_3381_groups_0, pad = var_3381_pad_0, pad_type = var_3381_pad_type_0, strides = var_3381_strides_0, weight = blocks_13_attn_out_weight_to_fp16, x = input_135_cast_fp16)[name = tensor<string, []>("op_3381_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = var_3381_cast_fp16)[name = tensor<string, []>("inputs_55_cast_fp16")];
+            tensor<int32, [1]> input_137_axes_0 = const()[name = tensor<string, []>("input_137_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_137_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_137_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(345746560)))];
+            tensor<fp16, [1024]> input_137_beta_0_to_fp16 = const()[name = tensor<string, []>("input_137_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(345748672)))];
+            tensor<fp16, []> var_3391_to_fp16 = const()[name = tensor<string, []>("op_3391_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_137_cast_fp16 = layer_norm(axes = input_137_axes_0, beta = input_137_beta_0_to_fp16, epsilon = var_3391_to_fp16, gamma = input_137_gamma_0_to_fp16, x = inputs_55_cast_fp16)[name = tensor<string, []>("input_137_cast_fp16")];
+            tensor<string, []> input_139_pad_type_0 = const()[name = tensor<string, []>("input_139_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_139_strides_0 = const()[name = tensor<string, []>("input_139_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_139_pad_0 = const()[name = tensor<string, []>("input_139_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_139_dilations_0 = const()[name = tensor<string, []>("input_139_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_139_groups_0 = const()[name = tensor<string, []>("input_139_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_13_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(345750784)))];
+            tensor<fp16, [4096]> blocks_13_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(354139456)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_139_cast_fp16 = conv(bias = blocks_13_mlp_0_bias_to_fp16, dilations = input_139_dilations_0, groups = input_139_groups_0, pad = input_139_pad_0, pad_type = input_139_pad_type_0, strides = input_139_strides_0, weight = blocks_13_mlp_0_weight_to_fp16, x = input_137_cast_fp16)[name = tensor<string, []>("input_139_cast_fp16")];
+            tensor<string, []> input_141_mode_0 = const()[name = tensor<string, []>("input_141_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_141_cast_fp16 = gelu(mode = input_141_mode_0, x = input_139_cast_fp16)[name = tensor<string, []>("input_141_cast_fp16")];
+            tensor<string, []> var_3417_pad_type_0 = const()[name = tensor<string, []>("op_3417_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3417_strides_0 = const()[name = tensor<string, []>("op_3417_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3417_pad_0 = const()[name = tensor<string, []>("op_3417_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3417_dilations_0 = const()[name = tensor<string, []>("op_3417_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3417_groups_0 = const()[name = tensor<string, []>("op_3417_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_13_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(354147712)))];
+            tensor<fp16, [1024]> blocks_13_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_13_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362536384)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3417_cast_fp16 = conv(bias = blocks_13_mlp_2_bias_to_fp16, dilations = var_3417_dilations_0, groups = var_3417_groups_0, pad = var_3417_pad_0, pad_type = var_3417_pad_type_0, strides = var_3417_strides_0, weight = blocks_13_mlp_2_weight_to_fp16, x = input_141_cast_fp16)[name = tensor<string, []>("op_3417_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = var_3417_cast_fp16)[name = tensor<string, []>("inputs_57_cast_fp16")];
+            tensor<int32, []> var_3426 = const()[name = tensor<string, []>("op_3426"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_143_axes_0 = const()[name = tensor<string, []>("input_143_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_143_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_143_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362538496)))];
+            tensor<fp16, [1024]> input_143_beta_0_to_fp16 = const()[name = tensor<string, []>("input_143_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362540608)))];
+            tensor<fp16, []> var_3442_to_fp16 = const()[name = tensor<string, []>("op_3442_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_143_cast_fp16 = layer_norm(axes = input_143_axes_0, beta = input_143_beta_0_to_fp16, epsilon = var_3442_to_fp16, gamma = input_143_gamma_0_to_fp16, x = inputs_57_cast_fp16)[name = tensor<string, []>("input_143_cast_fp16")];
+            tensor<string, []> q_29_pad_type_0 = const()[name = tensor<string, []>("q_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_29_strides_0 = const()[name = tensor<string, []>("q_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_29_pad_0 = const()[name = tensor<string, []>("q_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_29_dilations_0 = const()[name = tensor<string, []>("q_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_29_groups_0 = const()[name = tensor<string, []>("q_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_3477_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3477_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(362542720)))];
+            tensor<fp16, [1024]> var_3477_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3477_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364639936)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3477_cast_fp16 = conv(bias = var_3477_bias_0_to_fp16, dilations = q_29_dilations_0, groups = q_29_groups_0, pad = q_29_pad_0, pad_type = q_29_pad_type_0, strides = q_29_strides_0, weight = var_3477_weight_0_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("op_3477_cast_fp16")];
+            tensor<string, []> k_29_pad_type_0 = const()[name = tensor<string, []>("k_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_29_strides_0 = const()[name = tensor<string, []>("k_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_29_pad_0 = const()[name = tensor<string, []>("k_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_29_dilations_0 = const()[name = tensor<string, []>("k_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_29_groups_0 = const()[name = tensor<string, []>("k_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_14_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(364642048)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_29_cast_fp16 = conv(dilations = k_29_dilations_0, groups = k_29_groups_0, pad = k_29_pad_0, pad_type = k_29_pad_type_0, strides = k_29_strides_0, weight = blocks_14_attn_key_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("k_29_cast_fp16")];
+            tensor<string, []> var_3475_pad_type_0 = const()[name = tensor<string, []>("op_3475_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3475_strides_0 = const()[name = tensor<string, []>("op_3475_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3475_pad_0 = const()[name = tensor<string, []>("op_3475_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3475_dilations_0 = const()[name = tensor<string, []>("op_3475_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3475_groups_0 = const()[name = tensor<string, []>("op_3475_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_14_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(366739264)))];
+            tensor<fp16, [1024]> blocks_14_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368836480)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3475_cast_fp16 = conv(bias = blocks_14_attn_value_bias_to_fp16, dilations = var_3475_dilations_0, groups = var_3475_groups_0, pad = var_3475_pad_0, pad_type = var_3475_pad_type_0, strides = var_3475_strides_0, weight = blocks_14_attn_value_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("op_3475_cast_fp16")];
+            tensor<int32, [16]> tile_42 = const()[name = tensor<string, []>("tile_42"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3478_axis_0 = const()[name = tensor<string, []>("op_3478_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3478_cast_fp16_15 = split(axis = var_3478_axis_0, split_sizes = tile_42, x = var_3477_cast_fp16)[name = tensor<string, []>("op_3478_cast_fp16")];
+            tensor<int32, [4]> var_3495_perm_0 = const()[name = tensor<string, []>("op_3495_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_43 = const()[name = tensor<string, []>("tile_43"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3496_axis_0 = const()[name = tensor<string, []>("op_3496_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_3495_cast_fp16 = transpose(perm = var_3495_perm_0, x = k_29_cast_fp16)[name = tensor<string, []>("transpose_10")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3496_cast_fp16_15 = split(axis = var_3496_axis_0, split_sizes = tile_43, x = var_3495_cast_fp16)[name = tensor<string, []>("op_3496_cast_fp16")];
+            tensor<int32, [16]> tile_44 = const()[name = tensor<string, []>("tile_44"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3513_axis_0 = const()[name = tensor<string, []>("op_3513_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3513_cast_fp16_15 = split(axis = var_3513_axis_0, split_sizes = tile_44, x = var_3475_cast_fp16)[name = tensor<string, []>("op_3513_cast_fp16")];
+            tensor<string, []> aw_449_equation_0 = const()[name = tensor<string, []>("aw_449_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_449_cast_fp16 = einsum(equation = aw_449_equation_0, values = (var_3496_cast_fp16_0, var_3478_cast_fp16_0))[name = tensor<string, []>("aw_449_cast_fp16")];
+            tensor<string, []> aw_451_equation_0 = const()[name = tensor<string, []>("aw_451_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_451_cast_fp16 = einsum(equation = aw_451_equation_0, values = (var_3496_cast_fp16_1, var_3478_cast_fp16_1))[name = tensor<string, []>("aw_451_cast_fp16")];
+            tensor<string, []> aw_453_equation_0 = const()[name = tensor<string, []>("aw_453_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_453_cast_fp16 = einsum(equation = aw_453_equation_0, values = (var_3496_cast_fp16_2, var_3478_cast_fp16_2))[name = tensor<string, []>("aw_453_cast_fp16")];
+            tensor<string, []> aw_455_equation_0 = const()[name = tensor<string, []>("aw_455_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_455_cast_fp16 = einsum(equation = aw_455_equation_0, values = (var_3496_cast_fp16_3, var_3478_cast_fp16_3))[name = tensor<string, []>("aw_455_cast_fp16")];
+            tensor<string, []> aw_457_equation_0 = const()[name = tensor<string, []>("aw_457_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_457_cast_fp16 = einsum(equation = aw_457_equation_0, values = (var_3496_cast_fp16_4, var_3478_cast_fp16_4))[name = tensor<string, []>("aw_457_cast_fp16")];
+            tensor<string, []> aw_459_equation_0 = const()[name = tensor<string, []>("aw_459_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_459_cast_fp16 = einsum(equation = aw_459_equation_0, values = (var_3496_cast_fp16_5, var_3478_cast_fp16_5))[name = tensor<string, []>("aw_459_cast_fp16")];
+            tensor<string, []> aw_461_equation_0 = const()[name = tensor<string, []>("aw_461_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_461_cast_fp16 = einsum(equation = aw_461_equation_0, values = (var_3496_cast_fp16_6, var_3478_cast_fp16_6))[name = tensor<string, []>("aw_461_cast_fp16")];
+            tensor<string, []> aw_463_equation_0 = const()[name = tensor<string, []>("aw_463_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_463_cast_fp16 = einsum(equation = aw_463_equation_0, values = (var_3496_cast_fp16_7, var_3478_cast_fp16_7))[name = tensor<string, []>("aw_463_cast_fp16")];
+            tensor<string, []> aw_465_equation_0 = const()[name = tensor<string, []>("aw_465_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_465_cast_fp16 = einsum(equation = aw_465_equation_0, values = (var_3496_cast_fp16_8, var_3478_cast_fp16_8))[name = tensor<string, []>("aw_465_cast_fp16")];
+            tensor<string, []> aw_467_equation_0 = const()[name = tensor<string, []>("aw_467_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_467_cast_fp16 = einsum(equation = aw_467_equation_0, values = (var_3496_cast_fp16_9, var_3478_cast_fp16_9))[name = tensor<string, []>("aw_467_cast_fp16")];
+            tensor<string, []> aw_469_equation_0 = const()[name = tensor<string, []>("aw_469_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_469_cast_fp16 = einsum(equation = aw_469_equation_0, values = (var_3496_cast_fp16_10, var_3478_cast_fp16_10))[name = tensor<string, []>("aw_469_cast_fp16")];
+            tensor<string, []> aw_471_equation_0 = const()[name = tensor<string, []>("aw_471_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_471_cast_fp16 = einsum(equation = aw_471_equation_0, values = (var_3496_cast_fp16_11, var_3478_cast_fp16_11))[name = tensor<string, []>("aw_471_cast_fp16")];
+            tensor<string, []> aw_473_equation_0 = const()[name = tensor<string, []>("aw_473_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_473_cast_fp16 = einsum(equation = aw_473_equation_0, values = (var_3496_cast_fp16_12, var_3478_cast_fp16_12))[name = tensor<string, []>("aw_473_cast_fp16")];
+            tensor<string, []> aw_475_equation_0 = const()[name = tensor<string, []>("aw_475_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_475_cast_fp16 = einsum(equation = aw_475_equation_0, values = (var_3496_cast_fp16_13, var_3478_cast_fp16_13))[name = tensor<string, []>("aw_475_cast_fp16")];
+            tensor<string, []> aw_477_equation_0 = const()[name = tensor<string, []>("aw_477_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_477_cast_fp16 = einsum(equation = aw_477_equation_0, values = (var_3496_cast_fp16_14, var_3478_cast_fp16_14))[name = tensor<string, []>("aw_477_cast_fp16")];
+            tensor<string, []> aw_479_equation_0 = const()[name = tensor<string, []>("aw_479_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_479_cast_fp16 = einsum(equation = aw_479_equation_0, values = (var_3496_cast_fp16_15, var_3478_cast_fp16_15))[name = tensor<string, []>("aw_479_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3562_cast_fp16 = softmax(axis = var_3426, x = aw_449_cast_fp16)[name = tensor<string, []>("op_3562_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3563_cast_fp16 = softmax(axis = var_3426, x = aw_451_cast_fp16)[name = tensor<string, []>("op_3563_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3564_cast_fp16 = softmax(axis = var_3426, x = aw_453_cast_fp16)[name = tensor<string, []>("op_3564_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3565_cast_fp16 = softmax(axis = var_3426, x = aw_455_cast_fp16)[name = tensor<string, []>("op_3565_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3566_cast_fp16 = softmax(axis = var_3426, x = aw_457_cast_fp16)[name = tensor<string, []>("op_3566_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3567_cast_fp16 = softmax(axis = var_3426, x = aw_459_cast_fp16)[name = tensor<string, []>("op_3567_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3568_cast_fp16 = softmax(axis = var_3426, x = aw_461_cast_fp16)[name = tensor<string, []>("op_3568_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3569_cast_fp16 = softmax(axis = var_3426, x = aw_463_cast_fp16)[name = tensor<string, []>("op_3569_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3570_cast_fp16 = softmax(axis = var_3426, x = aw_465_cast_fp16)[name = tensor<string, []>("op_3570_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3571_cast_fp16 = softmax(axis = var_3426, x = aw_467_cast_fp16)[name = tensor<string, []>("op_3571_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3572_cast_fp16 = softmax(axis = var_3426, x = aw_469_cast_fp16)[name = tensor<string, []>("op_3572_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3573_cast_fp16 = softmax(axis = var_3426, x = aw_471_cast_fp16)[name = tensor<string, []>("op_3573_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3574_cast_fp16 = softmax(axis = var_3426, x = aw_473_cast_fp16)[name = tensor<string, []>("op_3574_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3575_cast_fp16 = softmax(axis = var_3426, x = aw_475_cast_fp16)[name = tensor<string, []>("op_3575_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3576_cast_fp16 = softmax(axis = var_3426, x = aw_477_cast_fp16)[name = tensor<string, []>("op_3576_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3577_cast_fp16 = softmax(axis = var_3426, x = aw_479_cast_fp16)[name = tensor<string, []>("op_3577_cast_fp16")];
+            tensor<string, []> var_3579_equation_0 = const()[name = tensor<string, []>("op_3579_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3579_cast_fp16 = einsum(equation = var_3579_equation_0, values = (var_3513_cast_fp16_0, var_3562_cast_fp16))[name = tensor<string, []>("op_3579_cast_fp16")];
+            tensor<string, []> var_3581_equation_0 = const()[name = tensor<string, []>("op_3581_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3581_cast_fp16 = einsum(equation = var_3581_equation_0, values = (var_3513_cast_fp16_1, var_3563_cast_fp16))[name = tensor<string, []>("op_3581_cast_fp16")];
+            tensor<string, []> var_3583_equation_0 = const()[name = tensor<string, []>("op_3583_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3583_cast_fp16 = einsum(equation = var_3583_equation_0, values = (var_3513_cast_fp16_2, var_3564_cast_fp16))[name = tensor<string, []>("op_3583_cast_fp16")];
+            tensor<string, []> var_3585_equation_0 = const()[name = tensor<string, []>("op_3585_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3585_cast_fp16 = einsum(equation = var_3585_equation_0, values = (var_3513_cast_fp16_3, var_3565_cast_fp16))[name = tensor<string, []>("op_3585_cast_fp16")];
+            tensor<string, []> var_3587_equation_0 = const()[name = tensor<string, []>("op_3587_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3587_cast_fp16 = einsum(equation = var_3587_equation_0, values = (var_3513_cast_fp16_4, var_3566_cast_fp16))[name = tensor<string, []>("op_3587_cast_fp16")];
+            tensor<string, []> var_3589_equation_0 = const()[name = tensor<string, []>("op_3589_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3589_cast_fp16 = einsum(equation = var_3589_equation_0, values = (var_3513_cast_fp16_5, var_3567_cast_fp16))[name = tensor<string, []>("op_3589_cast_fp16")];
+            tensor<string, []> var_3591_equation_0 = const()[name = tensor<string, []>("op_3591_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3591_cast_fp16 = einsum(equation = var_3591_equation_0, values = (var_3513_cast_fp16_6, var_3568_cast_fp16))[name = tensor<string, []>("op_3591_cast_fp16")];
+            tensor<string, []> var_3593_equation_0 = const()[name = tensor<string, []>("op_3593_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3593_cast_fp16 = einsum(equation = var_3593_equation_0, values = (var_3513_cast_fp16_7, var_3569_cast_fp16))[name = tensor<string, []>("op_3593_cast_fp16")];
+            tensor<string, []> var_3595_equation_0 = const()[name = tensor<string, []>("op_3595_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3595_cast_fp16 = einsum(equation = var_3595_equation_0, values = (var_3513_cast_fp16_8, var_3570_cast_fp16))[name = tensor<string, []>("op_3595_cast_fp16")];
+            tensor<string, []> var_3597_equation_0 = const()[name = tensor<string, []>("op_3597_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3597_cast_fp16 = einsum(equation = var_3597_equation_0, values = (var_3513_cast_fp16_9, var_3571_cast_fp16))[name = tensor<string, []>("op_3597_cast_fp16")];
+            tensor<string, []> var_3599_equation_0 = const()[name = tensor<string, []>("op_3599_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3599_cast_fp16 = einsum(equation = var_3599_equation_0, values = (var_3513_cast_fp16_10, var_3572_cast_fp16))[name = tensor<string, []>("op_3599_cast_fp16")];
+            tensor<string, []> var_3601_equation_0 = const()[name = tensor<string, []>("op_3601_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3601_cast_fp16 = einsum(equation = var_3601_equation_0, values = (var_3513_cast_fp16_11, var_3573_cast_fp16))[name = tensor<string, []>("op_3601_cast_fp16")];
+            tensor<string, []> var_3603_equation_0 = const()[name = tensor<string, []>("op_3603_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3603_cast_fp16 = einsum(equation = var_3603_equation_0, values = (var_3513_cast_fp16_12, var_3574_cast_fp16))[name = tensor<string, []>("op_3603_cast_fp16")];
+            tensor<string, []> var_3605_equation_0 = const()[name = tensor<string, []>("op_3605_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3605_cast_fp16 = einsum(equation = var_3605_equation_0, values = (var_3513_cast_fp16_13, var_3575_cast_fp16))[name = tensor<string, []>("op_3605_cast_fp16")];
+            tensor<string, []> var_3607_equation_0 = const()[name = tensor<string, []>("op_3607_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3607_cast_fp16 = einsum(equation = var_3607_equation_0, values = (var_3513_cast_fp16_14, var_3576_cast_fp16))[name = tensor<string, []>("op_3607_cast_fp16")];
+            tensor<string, []> var_3609_equation_0 = const()[name = tensor<string, []>("op_3609_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3609_cast_fp16 = einsum(equation = var_3609_equation_0, values = (var_3513_cast_fp16_15, var_3577_cast_fp16))[name = tensor<string, []>("op_3609_cast_fp16")];
+            tensor<bool, []> input_145_interleave_0 = const()[name = tensor<string, []>("input_145_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_145_cast_fp16 = concat(axis = var_3426, interleave = input_145_interleave_0, values = (var_3579_cast_fp16, var_3581_cast_fp16, var_3583_cast_fp16, var_3585_cast_fp16, var_3587_cast_fp16, var_3589_cast_fp16, var_3591_cast_fp16, var_3593_cast_fp16, var_3595_cast_fp16, var_3597_cast_fp16, var_3599_cast_fp16, var_3601_cast_fp16, var_3603_cast_fp16, var_3605_cast_fp16, var_3607_cast_fp16, var_3609_cast_fp16))[name = tensor<string, []>("input_145_cast_fp16")];
+            tensor<string, []> var_3618_pad_type_0 = const()[name = tensor<string, []>("op_3618_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3618_strides_0 = const()[name = tensor<string, []>("op_3618_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3618_pad_0 = const()[name = tensor<string, []>("op_3618_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3618_dilations_0 = const()[name = tensor<string, []>("op_3618_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3618_groups_0 = const()[name = tensor<string, []>("op_3618_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_14_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368838592)))];
+            tensor<fp16, [1024]> blocks_14_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(370935808)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3618_cast_fp16 = conv(bias = blocks_14_attn_out_bias_to_fp16, dilations = var_3618_dilations_0, groups = var_3618_groups_0, pad = var_3618_pad_0, pad_type = var_3618_pad_type_0, strides = var_3618_strides_0, weight = blocks_14_attn_out_weight_to_fp16, x = input_145_cast_fp16)[name = tensor<string, []>("op_3618_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = var_3618_cast_fp16)[name = tensor<string, []>("inputs_59_cast_fp16")];
+            tensor<int32, [1]> input_147_axes_0 = const()[name = tensor<string, []>("input_147_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_147_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_147_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(370937920)))];
+            tensor<fp16, [1024]> input_147_beta_0_to_fp16 = const()[name = tensor<string, []>("input_147_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(370940032)))];
+            tensor<fp16, []> var_3628_to_fp16 = const()[name = tensor<string, []>("op_3628_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_147_cast_fp16 = layer_norm(axes = input_147_axes_0, beta = input_147_beta_0_to_fp16, epsilon = var_3628_to_fp16, gamma = input_147_gamma_0_to_fp16, x = inputs_59_cast_fp16)[name = tensor<string, []>("input_147_cast_fp16")];
+            tensor<string, []> input_149_pad_type_0 = const()[name = tensor<string, []>("input_149_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_149_strides_0 = const()[name = tensor<string, []>("input_149_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_149_pad_0 = const()[name = tensor<string, []>("input_149_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_149_dilations_0 = const()[name = tensor<string, []>("input_149_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_149_groups_0 = const()[name = tensor<string, []>("input_149_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_14_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(370942144)))];
+            tensor<fp16, [4096]> blocks_14_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(379330816)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_149_cast_fp16 = conv(bias = blocks_14_mlp_0_bias_to_fp16, dilations = input_149_dilations_0, groups = input_149_groups_0, pad = input_149_pad_0, pad_type = input_149_pad_type_0, strides = input_149_strides_0, weight = blocks_14_mlp_0_weight_to_fp16, x = input_147_cast_fp16)[name = tensor<string, []>("input_149_cast_fp16")];
+            tensor<string, []> input_151_mode_0 = const()[name = tensor<string, []>("input_151_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_151_cast_fp16 = gelu(mode = input_151_mode_0, x = input_149_cast_fp16)[name = tensor<string, []>("input_151_cast_fp16")];
+            tensor<string, []> var_3654_pad_type_0 = const()[name = tensor<string, []>("op_3654_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3654_strides_0 = const()[name = tensor<string, []>("op_3654_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3654_pad_0 = const()[name = tensor<string, []>("op_3654_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3654_dilations_0 = const()[name = tensor<string, []>("op_3654_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3654_groups_0 = const()[name = tensor<string, []>("op_3654_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_14_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(379339072)))];
+            tensor<fp16, [1024]> blocks_14_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_14_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(387727744)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3654_cast_fp16 = conv(bias = blocks_14_mlp_2_bias_to_fp16, dilations = var_3654_dilations_0, groups = var_3654_groups_0, pad = var_3654_pad_0, pad_type = var_3654_pad_type_0, strides = var_3654_strides_0, weight = blocks_14_mlp_2_weight_to_fp16, x = input_151_cast_fp16)[name = tensor<string, []>("op_3654_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = var_3654_cast_fp16)[name = tensor<string, []>("inputs_61_cast_fp16")];
+            tensor<int32, []> var_3663 = const()[name = tensor<string, []>("op_3663"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_153_axes_0 = const()[name = tensor<string, []>("input_153_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_153_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_153_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(387729856)))];
+            tensor<fp16, [1024]> input_153_beta_0_to_fp16 = const()[name = tensor<string, []>("input_153_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(387731968)))];
+            tensor<fp16, []> var_3679_to_fp16 = const()[name = tensor<string, []>("op_3679_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_153_cast_fp16 = layer_norm(axes = input_153_axes_0, beta = input_153_beta_0_to_fp16, epsilon = var_3679_to_fp16, gamma = input_153_gamma_0_to_fp16, x = inputs_61_cast_fp16)[name = tensor<string, []>("input_153_cast_fp16")];
+            tensor<string, []> q_31_pad_type_0 = const()[name = tensor<string, []>("q_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_31_strides_0 = const()[name = tensor<string, []>("q_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_31_pad_0 = const()[name = tensor<string, []>("q_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_31_dilations_0 = const()[name = tensor<string, []>("q_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_31_groups_0 = const()[name = tensor<string, []>("q_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_3714_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3714_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(387734080)))];
+            tensor<fp16, [1024]> var_3714_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3714_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(389831296)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3714_cast_fp16 = conv(bias = var_3714_bias_0_to_fp16, dilations = q_31_dilations_0, groups = q_31_groups_0, pad = q_31_pad_0, pad_type = q_31_pad_type_0, strides = q_31_strides_0, weight = var_3714_weight_0_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("op_3714_cast_fp16")];
+            tensor<string, []> k_31_pad_type_0 = const()[name = tensor<string, []>("k_31_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_31_strides_0 = const()[name = tensor<string, []>("k_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_31_pad_0 = const()[name = tensor<string, []>("k_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_31_dilations_0 = const()[name = tensor<string, []>("k_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_31_groups_0 = const()[name = tensor<string, []>("k_31_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_15_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(389833408)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_31_cast_fp16 = conv(dilations = k_31_dilations_0, groups = k_31_groups_0, pad = k_31_pad_0, pad_type = k_31_pad_type_0, strides = k_31_strides_0, weight = blocks_15_attn_key_weight_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("k_31_cast_fp16")];
+            tensor<string, []> var_3712_pad_type_0 = const()[name = tensor<string, []>("op_3712_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3712_strides_0 = const()[name = tensor<string, []>("op_3712_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3712_pad_0 = const()[name = tensor<string, []>("op_3712_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3712_dilations_0 = const()[name = tensor<string, []>("op_3712_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3712_groups_0 = const()[name = tensor<string, []>("op_3712_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_15_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(391930624)))];
+            tensor<fp16, [1024]> blocks_15_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(394027840)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3712_cast_fp16 = conv(bias = blocks_15_attn_value_bias_to_fp16, dilations = var_3712_dilations_0, groups = var_3712_groups_0, pad = var_3712_pad_0, pad_type = var_3712_pad_type_0, strides = var_3712_strides_0, weight = blocks_15_attn_value_weight_to_fp16, x = input_153_cast_fp16)[name = tensor<string, []>("op_3712_cast_fp16")];
+            tensor<int32, [16]> tile_45 = const()[name = tensor<string, []>("tile_45"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3715_axis_0 = const()[name = tensor<string, []>("op_3715_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3715_cast_fp16_15 = split(axis = var_3715_axis_0, split_sizes = tile_45, x = var_3714_cast_fp16)[name = tensor<string, []>("op_3715_cast_fp16")];
+            tensor<int32, [4]> var_3732_perm_0 = const()[name = tensor<string, []>("op_3732_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_46 = const()[name = tensor<string, []>("tile_46"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3733_axis_0 = const()[name = tensor<string, []>("op_3733_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_3732_cast_fp16 = transpose(perm = var_3732_perm_0, x = k_31_cast_fp16)[name = tensor<string, []>("transpose_9")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3733_cast_fp16_15 = split(axis = var_3733_axis_0, split_sizes = tile_46, x = var_3732_cast_fp16)[name = tensor<string, []>("op_3733_cast_fp16")];
+            tensor<int32, [16]> tile_47 = const()[name = tensor<string, []>("tile_47"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3750_axis_0 = const()[name = tensor<string, []>("op_3750_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3750_cast_fp16_15 = split(axis = var_3750_axis_0, split_sizes = tile_47, x = var_3712_cast_fp16)[name = tensor<string, []>("op_3750_cast_fp16")];
+            tensor<string, []> aw_481_equation_0 = const()[name = tensor<string, []>("aw_481_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_481_cast_fp16 = einsum(equation = aw_481_equation_0, values = (var_3733_cast_fp16_0, var_3715_cast_fp16_0))[name = tensor<string, []>("aw_481_cast_fp16")];
+            tensor<string, []> aw_483_equation_0 = const()[name = tensor<string, []>("aw_483_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_483_cast_fp16 = einsum(equation = aw_483_equation_0, values = (var_3733_cast_fp16_1, var_3715_cast_fp16_1))[name = tensor<string, []>("aw_483_cast_fp16")];
+            tensor<string, []> aw_485_equation_0 = const()[name = tensor<string, []>("aw_485_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_485_cast_fp16 = einsum(equation = aw_485_equation_0, values = (var_3733_cast_fp16_2, var_3715_cast_fp16_2))[name = tensor<string, []>("aw_485_cast_fp16")];
+            tensor<string, []> aw_487_equation_0 = const()[name = tensor<string, []>("aw_487_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_487_cast_fp16 = einsum(equation = aw_487_equation_0, values = (var_3733_cast_fp16_3, var_3715_cast_fp16_3))[name = tensor<string, []>("aw_487_cast_fp16")];
+            tensor<string, []> aw_489_equation_0 = const()[name = tensor<string, []>("aw_489_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_489_cast_fp16 = einsum(equation = aw_489_equation_0, values = (var_3733_cast_fp16_4, var_3715_cast_fp16_4))[name = tensor<string, []>("aw_489_cast_fp16")];
+            tensor<string, []> aw_491_equation_0 = const()[name = tensor<string, []>("aw_491_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_491_cast_fp16 = einsum(equation = aw_491_equation_0, values = (var_3733_cast_fp16_5, var_3715_cast_fp16_5))[name = tensor<string, []>("aw_491_cast_fp16")];
+            tensor<string, []> aw_493_equation_0 = const()[name = tensor<string, []>("aw_493_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_493_cast_fp16 = einsum(equation = aw_493_equation_0, values = (var_3733_cast_fp16_6, var_3715_cast_fp16_6))[name = tensor<string, []>("aw_493_cast_fp16")];
+            tensor<string, []> aw_495_equation_0 = const()[name = tensor<string, []>("aw_495_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_495_cast_fp16 = einsum(equation = aw_495_equation_0, values = (var_3733_cast_fp16_7, var_3715_cast_fp16_7))[name = tensor<string, []>("aw_495_cast_fp16")];
+            tensor<string, []> aw_497_equation_0 = const()[name = tensor<string, []>("aw_497_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_497_cast_fp16 = einsum(equation = aw_497_equation_0, values = (var_3733_cast_fp16_8, var_3715_cast_fp16_8))[name = tensor<string, []>("aw_497_cast_fp16")];
+            tensor<string, []> aw_499_equation_0 = const()[name = tensor<string, []>("aw_499_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_499_cast_fp16 = einsum(equation = aw_499_equation_0, values = (var_3733_cast_fp16_9, var_3715_cast_fp16_9))[name = tensor<string, []>("aw_499_cast_fp16")];
+            tensor<string, []> aw_501_equation_0 = const()[name = tensor<string, []>("aw_501_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_501_cast_fp16 = einsum(equation = aw_501_equation_0, values = (var_3733_cast_fp16_10, var_3715_cast_fp16_10))[name = tensor<string, []>("aw_501_cast_fp16")];
+            tensor<string, []> aw_503_equation_0 = const()[name = tensor<string, []>("aw_503_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_503_cast_fp16 = einsum(equation = aw_503_equation_0, values = (var_3733_cast_fp16_11, var_3715_cast_fp16_11))[name = tensor<string, []>("aw_503_cast_fp16")];
+            tensor<string, []> aw_505_equation_0 = const()[name = tensor<string, []>("aw_505_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_505_cast_fp16 = einsum(equation = aw_505_equation_0, values = (var_3733_cast_fp16_12, var_3715_cast_fp16_12))[name = tensor<string, []>("aw_505_cast_fp16")];
+            tensor<string, []> aw_507_equation_0 = const()[name = tensor<string, []>("aw_507_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_507_cast_fp16 = einsum(equation = aw_507_equation_0, values = (var_3733_cast_fp16_13, var_3715_cast_fp16_13))[name = tensor<string, []>("aw_507_cast_fp16")];
+            tensor<string, []> aw_509_equation_0 = const()[name = tensor<string, []>("aw_509_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_509_cast_fp16 = einsum(equation = aw_509_equation_0, values = (var_3733_cast_fp16_14, var_3715_cast_fp16_14))[name = tensor<string, []>("aw_509_cast_fp16")];
+            tensor<string, []> aw_511_equation_0 = const()[name = tensor<string, []>("aw_511_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_511_cast_fp16 = einsum(equation = aw_511_equation_0, values = (var_3733_cast_fp16_15, var_3715_cast_fp16_15))[name = tensor<string, []>("aw_511_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3799_cast_fp16 = softmax(axis = var_3663, x = aw_481_cast_fp16)[name = tensor<string, []>("op_3799_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3800_cast_fp16 = softmax(axis = var_3663, x = aw_483_cast_fp16)[name = tensor<string, []>("op_3800_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3801_cast_fp16 = softmax(axis = var_3663, x = aw_485_cast_fp16)[name = tensor<string, []>("op_3801_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3802_cast_fp16 = softmax(axis = var_3663, x = aw_487_cast_fp16)[name = tensor<string, []>("op_3802_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3803_cast_fp16 = softmax(axis = var_3663, x = aw_489_cast_fp16)[name = tensor<string, []>("op_3803_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3804_cast_fp16 = softmax(axis = var_3663, x = aw_491_cast_fp16)[name = tensor<string, []>("op_3804_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3805_cast_fp16 = softmax(axis = var_3663, x = aw_493_cast_fp16)[name = tensor<string, []>("op_3805_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3806_cast_fp16 = softmax(axis = var_3663, x = aw_495_cast_fp16)[name = tensor<string, []>("op_3806_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3807_cast_fp16 = softmax(axis = var_3663, x = aw_497_cast_fp16)[name = tensor<string, []>("op_3807_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3808_cast_fp16 = softmax(axis = var_3663, x = aw_499_cast_fp16)[name = tensor<string, []>("op_3808_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3809_cast_fp16 = softmax(axis = var_3663, x = aw_501_cast_fp16)[name = tensor<string, []>("op_3809_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3810_cast_fp16 = softmax(axis = var_3663, x = aw_503_cast_fp16)[name = tensor<string, []>("op_3810_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3811_cast_fp16 = softmax(axis = var_3663, x = aw_505_cast_fp16)[name = tensor<string, []>("op_3811_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3812_cast_fp16 = softmax(axis = var_3663, x = aw_507_cast_fp16)[name = tensor<string, []>("op_3812_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3813_cast_fp16 = softmax(axis = var_3663, x = aw_509_cast_fp16)[name = tensor<string, []>("op_3813_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_3814_cast_fp16 = softmax(axis = var_3663, x = aw_511_cast_fp16)[name = tensor<string, []>("op_3814_cast_fp16")];
+            tensor<string, []> var_3816_equation_0 = const()[name = tensor<string, []>("op_3816_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3816_cast_fp16 = einsum(equation = var_3816_equation_0, values = (var_3750_cast_fp16_0, var_3799_cast_fp16))[name = tensor<string, []>("op_3816_cast_fp16")];
+            tensor<string, []> var_3818_equation_0 = const()[name = tensor<string, []>("op_3818_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3818_cast_fp16 = einsum(equation = var_3818_equation_0, values = (var_3750_cast_fp16_1, var_3800_cast_fp16))[name = tensor<string, []>("op_3818_cast_fp16")];
+            tensor<string, []> var_3820_equation_0 = const()[name = tensor<string, []>("op_3820_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3820_cast_fp16 = einsum(equation = var_3820_equation_0, values = (var_3750_cast_fp16_2, var_3801_cast_fp16))[name = tensor<string, []>("op_3820_cast_fp16")];
+            tensor<string, []> var_3822_equation_0 = const()[name = tensor<string, []>("op_3822_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3822_cast_fp16 = einsum(equation = var_3822_equation_0, values = (var_3750_cast_fp16_3, var_3802_cast_fp16))[name = tensor<string, []>("op_3822_cast_fp16")];
+            tensor<string, []> var_3824_equation_0 = const()[name = tensor<string, []>("op_3824_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3824_cast_fp16 = einsum(equation = var_3824_equation_0, values = (var_3750_cast_fp16_4, var_3803_cast_fp16))[name = tensor<string, []>("op_3824_cast_fp16")];
+            tensor<string, []> var_3826_equation_0 = const()[name = tensor<string, []>("op_3826_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3826_cast_fp16 = einsum(equation = var_3826_equation_0, values = (var_3750_cast_fp16_5, var_3804_cast_fp16))[name = tensor<string, []>("op_3826_cast_fp16")];
+            tensor<string, []> var_3828_equation_0 = const()[name = tensor<string, []>("op_3828_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3828_cast_fp16 = einsum(equation = var_3828_equation_0, values = (var_3750_cast_fp16_6, var_3805_cast_fp16))[name = tensor<string, []>("op_3828_cast_fp16")];
+            tensor<string, []> var_3830_equation_0 = const()[name = tensor<string, []>("op_3830_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3830_cast_fp16 = einsum(equation = var_3830_equation_0, values = (var_3750_cast_fp16_7, var_3806_cast_fp16))[name = tensor<string, []>("op_3830_cast_fp16")];
+            tensor<string, []> var_3832_equation_0 = const()[name = tensor<string, []>("op_3832_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3832_cast_fp16 = einsum(equation = var_3832_equation_0, values = (var_3750_cast_fp16_8, var_3807_cast_fp16))[name = tensor<string, []>("op_3832_cast_fp16")];
+            tensor<string, []> var_3834_equation_0 = const()[name = tensor<string, []>("op_3834_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3834_cast_fp16 = einsum(equation = var_3834_equation_0, values = (var_3750_cast_fp16_9, var_3808_cast_fp16))[name = tensor<string, []>("op_3834_cast_fp16")];
+            tensor<string, []> var_3836_equation_0 = const()[name = tensor<string, []>("op_3836_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3836_cast_fp16 = einsum(equation = var_3836_equation_0, values = (var_3750_cast_fp16_10, var_3809_cast_fp16))[name = tensor<string, []>("op_3836_cast_fp16")];
+            tensor<string, []> var_3838_equation_0 = const()[name = tensor<string, []>("op_3838_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3838_cast_fp16 = einsum(equation = var_3838_equation_0, values = (var_3750_cast_fp16_11, var_3810_cast_fp16))[name = tensor<string, []>("op_3838_cast_fp16")];
+            tensor<string, []> var_3840_equation_0 = const()[name = tensor<string, []>("op_3840_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3840_cast_fp16 = einsum(equation = var_3840_equation_0, values = (var_3750_cast_fp16_12, var_3811_cast_fp16))[name = tensor<string, []>("op_3840_cast_fp16")];
+            tensor<string, []> var_3842_equation_0 = const()[name = tensor<string, []>("op_3842_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3842_cast_fp16 = einsum(equation = var_3842_equation_0, values = (var_3750_cast_fp16_13, var_3812_cast_fp16))[name = tensor<string, []>("op_3842_cast_fp16")];
+            tensor<string, []> var_3844_equation_0 = const()[name = tensor<string, []>("op_3844_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3844_cast_fp16 = einsum(equation = var_3844_equation_0, values = (var_3750_cast_fp16_14, var_3813_cast_fp16))[name = tensor<string, []>("op_3844_cast_fp16")];
+            tensor<string, []> var_3846_equation_0 = const()[name = tensor<string, []>("op_3846_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_3846_cast_fp16 = einsum(equation = var_3846_equation_0, values = (var_3750_cast_fp16_15, var_3814_cast_fp16))[name = tensor<string, []>("op_3846_cast_fp16")];
+            tensor<bool, []> input_155_interleave_0 = const()[name = tensor<string, []>("input_155_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_155_cast_fp16 = concat(axis = var_3663, interleave = input_155_interleave_0, values = (var_3816_cast_fp16, var_3818_cast_fp16, var_3820_cast_fp16, var_3822_cast_fp16, var_3824_cast_fp16, var_3826_cast_fp16, var_3828_cast_fp16, var_3830_cast_fp16, var_3832_cast_fp16, var_3834_cast_fp16, var_3836_cast_fp16, var_3838_cast_fp16, var_3840_cast_fp16, var_3842_cast_fp16, var_3844_cast_fp16, var_3846_cast_fp16))[name = tensor<string, []>("input_155_cast_fp16")];
+            tensor<string, []> var_3855_pad_type_0 = const()[name = tensor<string, []>("op_3855_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3855_strides_0 = const()[name = tensor<string, []>("op_3855_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3855_pad_0 = const()[name = tensor<string, []>("op_3855_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3855_dilations_0 = const()[name = tensor<string, []>("op_3855_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3855_groups_0 = const()[name = tensor<string, []>("op_3855_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_15_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(394029952)))];
+            tensor<fp16, [1024]> blocks_15_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(396127168)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3855_cast_fp16 = conv(bias = blocks_15_attn_out_bias_to_fp16, dilations = var_3855_dilations_0, groups = var_3855_groups_0, pad = var_3855_pad_0, pad_type = var_3855_pad_type_0, strides = var_3855_strides_0, weight = blocks_15_attn_out_weight_to_fp16, x = input_155_cast_fp16)[name = tensor<string, []>("op_3855_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = var_3855_cast_fp16)[name = tensor<string, []>("inputs_63_cast_fp16")];
+            tensor<int32, [1]> input_157_axes_0 = const()[name = tensor<string, []>("input_157_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_157_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_157_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(396129280)))];
+            tensor<fp16, [1024]> input_157_beta_0_to_fp16 = const()[name = tensor<string, []>("input_157_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(396131392)))];
+            tensor<fp16, []> var_3865_to_fp16 = const()[name = tensor<string, []>("op_3865_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_157_cast_fp16 = layer_norm(axes = input_157_axes_0, beta = input_157_beta_0_to_fp16, epsilon = var_3865_to_fp16, gamma = input_157_gamma_0_to_fp16, x = inputs_63_cast_fp16)[name = tensor<string, []>("input_157_cast_fp16")];
+            tensor<string, []> input_159_pad_type_0 = const()[name = tensor<string, []>("input_159_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_159_strides_0 = const()[name = tensor<string, []>("input_159_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_159_pad_0 = const()[name = tensor<string, []>("input_159_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_159_dilations_0 = const()[name = tensor<string, []>("input_159_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_159_groups_0 = const()[name = tensor<string, []>("input_159_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_15_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(396133504)))];
+            tensor<fp16, [4096]> blocks_15_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(404522176)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_159_cast_fp16 = conv(bias = blocks_15_mlp_0_bias_to_fp16, dilations = input_159_dilations_0, groups = input_159_groups_0, pad = input_159_pad_0, pad_type = input_159_pad_type_0, strides = input_159_strides_0, weight = blocks_15_mlp_0_weight_to_fp16, x = input_157_cast_fp16)[name = tensor<string, []>("input_159_cast_fp16")];
+            tensor<string, []> input_161_mode_0 = const()[name = tensor<string, []>("input_161_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_161_cast_fp16 = gelu(mode = input_161_mode_0, x = input_159_cast_fp16)[name = tensor<string, []>("input_161_cast_fp16")];
+            tensor<string, []> var_3891_pad_type_0 = const()[name = tensor<string, []>("op_3891_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3891_strides_0 = const()[name = tensor<string, []>("op_3891_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3891_pad_0 = const()[name = tensor<string, []>("op_3891_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3891_dilations_0 = const()[name = tensor<string, []>("op_3891_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3891_groups_0 = const()[name = tensor<string, []>("op_3891_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_15_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(404530432)))];
+            tensor<fp16, [1024]> blocks_15_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_15_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412919104)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3891_cast_fp16 = conv(bias = blocks_15_mlp_2_bias_to_fp16, dilations = var_3891_dilations_0, groups = var_3891_groups_0, pad = var_3891_pad_0, pad_type = var_3891_pad_type_0, strides = var_3891_strides_0, weight = blocks_15_mlp_2_weight_to_fp16, x = input_161_cast_fp16)[name = tensor<string, []>("op_3891_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = var_3891_cast_fp16)[name = tensor<string, []>("inputs_65_cast_fp16")];
+            tensor<int32, []> var_3900 = const()[name = tensor<string, []>("op_3900"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_163_axes_0 = const()[name = tensor<string, []>("input_163_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_163_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_163_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412921216)))];
+            tensor<fp16, [1024]> input_163_beta_0_to_fp16 = const()[name = tensor<string, []>("input_163_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412923328)))];
+            tensor<fp16, []> var_3916_to_fp16 = const()[name = tensor<string, []>("op_3916_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_163_cast_fp16 = layer_norm(axes = input_163_axes_0, beta = input_163_beta_0_to_fp16, epsilon = var_3916_to_fp16, gamma = input_163_gamma_0_to_fp16, x = inputs_65_cast_fp16)[name = tensor<string, []>("input_163_cast_fp16")];
+            tensor<string, []> q_33_pad_type_0 = const()[name = tensor<string, []>("q_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_33_strides_0 = const()[name = tensor<string, []>("q_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_33_pad_0 = const()[name = tensor<string, []>("q_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_33_dilations_0 = const()[name = tensor<string, []>("q_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_33_groups_0 = const()[name = tensor<string, []>("q_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_3951_weight_0_to_fp16 = const()[name = tensor<string, []>("op_3951_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(412925440)))];
+            tensor<fp16, [1024]> var_3951_bias_0_to_fp16 = const()[name = tensor<string, []>("op_3951_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(415022656)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3951_cast_fp16 = conv(bias = var_3951_bias_0_to_fp16, dilations = q_33_dilations_0, groups = q_33_groups_0, pad = q_33_pad_0, pad_type = q_33_pad_type_0, strides = q_33_strides_0, weight = var_3951_weight_0_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("op_3951_cast_fp16")];
+            tensor<string, []> k_33_pad_type_0 = const()[name = tensor<string, []>("k_33_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_33_strides_0 = const()[name = tensor<string, []>("k_33_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_33_pad_0 = const()[name = tensor<string, []>("k_33_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_33_dilations_0 = const()[name = tensor<string, []>("k_33_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_33_groups_0 = const()[name = tensor<string, []>("k_33_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_16_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(415024768)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_33_cast_fp16 = conv(dilations = k_33_dilations_0, groups = k_33_groups_0, pad = k_33_pad_0, pad_type = k_33_pad_type_0, strides = k_33_strides_0, weight = blocks_16_attn_key_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("k_33_cast_fp16")];
+            tensor<string, []> var_3949_pad_type_0 = const()[name = tensor<string, []>("op_3949_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_3949_strides_0 = const()[name = tensor<string, []>("op_3949_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3949_pad_0 = const()[name = tensor<string, []>("op_3949_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3949_dilations_0 = const()[name = tensor<string, []>("op_3949_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_3949_groups_0 = const()[name = tensor<string, []>("op_3949_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_16_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(417121984)))];
+            tensor<fp16, [1024]> blocks_16_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(419219200)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_3949_cast_fp16 = conv(bias = blocks_16_attn_value_bias_to_fp16, dilations = var_3949_dilations_0, groups = var_3949_groups_0, pad = var_3949_pad_0, pad_type = var_3949_pad_type_0, strides = var_3949_strides_0, weight = blocks_16_attn_value_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("op_3949_cast_fp16")];
+            tensor<int32, [16]> tile_48 = const()[name = tensor<string, []>("tile_48"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3952_axis_0 = const()[name = tensor<string, []>("op_3952_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3952_cast_fp16_15 = split(axis = var_3952_axis_0, split_sizes = tile_48, x = var_3951_cast_fp16)[name = tensor<string, []>("op_3952_cast_fp16")];
+            tensor<int32, [4]> var_3969_perm_0 = const()[name = tensor<string, []>("op_3969_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_49 = const()[name = tensor<string, []>("tile_49"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3970_axis_0 = const()[name = tensor<string, []>("op_3970_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_3969_cast_fp16 = transpose(perm = var_3969_perm_0, x = k_33_cast_fp16)[name = tensor<string, []>("transpose_8")];
+            tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_3970_cast_fp16_15 = split(axis = var_3970_axis_0, split_sizes = tile_49, x = var_3969_cast_fp16)[name = tensor<string, []>("op_3970_cast_fp16")];
+            tensor<int32, [16]> tile_50 = const()[name = tensor<string, []>("tile_50"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_3987_axis_0 = const()[name = tensor<string, []>("op_3987_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_3987_cast_fp16_15 = split(axis = var_3987_axis_0, split_sizes = tile_50, x = var_3949_cast_fp16)[name = tensor<string, []>("op_3987_cast_fp16")];
+            tensor<string, []> aw_513_equation_0 = const()[name = tensor<string, []>("aw_513_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_513_cast_fp16 = einsum(equation = aw_513_equation_0, values = (var_3970_cast_fp16_0, var_3952_cast_fp16_0))[name = tensor<string, []>("aw_513_cast_fp16")];
+            tensor<string, []> aw_515_equation_0 = const()[name = tensor<string, []>("aw_515_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_515_cast_fp16 = einsum(equation = aw_515_equation_0, values = (var_3970_cast_fp16_1, var_3952_cast_fp16_1))[name = tensor<string, []>("aw_515_cast_fp16")];
+            tensor<string, []> aw_517_equation_0 = const()[name = tensor<string, []>("aw_517_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_517_cast_fp16 = einsum(equation = aw_517_equation_0, values = (var_3970_cast_fp16_2, var_3952_cast_fp16_2))[name = tensor<string, []>("aw_517_cast_fp16")];
+            tensor<string, []> aw_519_equation_0 = const()[name = tensor<string, []>("aw_519_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_519_cast_fp16 = einsum(equation = aw_519_equation_0, values = (var_3970_cast_fp16_3, var_3952_cast_fp16_3))[name = tensor<string, []>("aw_519_cast_fp16")];
+            tensor<string, []> aw_521_equation_0 = const()[name = tensor<string, []>("aw_521_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_521_cast_fp16 = einsum(equation = aw_521_equation_0, values = (var_3970_cast_fp16_4, var_3952_cast_fp16_4))[name = tensor<string, []>("aw_521_cast_fp16")];
+            tensor<string, []> aw_523_equation_0 = const()[name = tensor<string, []>("aw_523_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_523_cast_fp16 = einsum(equation = aw_523_equation_0, values = (var_3970_cast_fp16_5, var_3952_cast_fp16_5))[name = tensor<string, []>("aw_523_cast_fp16")];
+            tensor<string, []> aw_525_equation_0 = const()[name = tensor<string, []>("aw_525_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_525_cast_fp16 = einsum(equation = aw_525_equation_0, values = (var_3970_cast_fp16_6, var_3952_cast_fp16_6))[name = tensor<string, []>("aw_525_cast_fp16")];
+            tensor<string, []> aw_527_equation_0 = const()[name = tensor<string, []>("aw_527_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_527_cast_fp16 = einsum(equation = aw_527_equation_0, values = (var_3970_cast_fp16_7, var_3952_cast_fp16_7))[name = tensor<string, []>("aw_527_cast_fp16")];
+            tensor<string, []> aw_529_equation_0 = const()[name = tensor<string, []>("aw_529_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_529_cast_fp16 = einsum(equation = aw_529_equation_0, values = (var_3970_cast_fp16_8, var_3952_cast_fp16_8))[name = tensor<string, []>("aw_529_cast_fp16")];
+            tensor<string, []> aw_531_equation_0 = const()[name = tensor<string, []>("aw_531_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_531_cast_fp16 = einsum(equation = aw_531_equation_0, values = (var_3970_cast_fp16_9, var_3952_cast_fp16_9))[name = tensor<string, []>("aw_531_cast_fp16")];
+            tensor<string, []> aw_533_equation_0 = const()[name = tensor<string, []>("aw_533_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_533_cast_fp16 = einsum(equation = aw_533_equation_0, values = (var_3970_cast_fp16_10, var_3952_cast_fp16_10))[name = tensor<string, []>("aw_533_cast_fp16")];
+            tensor<string, []> aw_535_equation_0 = const()[name = tensor<string, []>("aw_535_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_535_cast_fp16 = einsum(equation = aw_535_equation_0, values = (var_3970_cast_fp16_11, var_3952_cast_fp16_11))[name = tensor<string, []>("aw_535_cast_fp16")];
+            tensor<string, []> aw_537_equation_0 = const()[name = tensor<string, []>("aw_537_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_537_cast_fp16 = einsum(equation = aw_537_equation_0, values = (var_3970_cast_fp16_12, var_3952_cast_fp16_12))[name = tensor<string, []>("aw_537_cast_fp16")];
+            tensor<string, []> aw_539_equation_0 = const()[name = tensor<string, []>("aw_539_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_539_cast_fp16 = einsum(equation = aw_539_equation_0, values = (var_3970_cast_fp16_13, var_3952_cast_fp16_13))[name = tensor<string, []>("aw_539_cast_fp16")];
+            tensor<string, []> aw_541_equation_0 = const()[name = tensor<string, []>("aw_541_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_541_cast_fp16 = einsum(equation = aw_541_equation_0, values = (var_3970_cast_fp16_14, var_3952_cast_fp16_14))[name = tensor<string, []>("aw_541_cast_fp16")];
+            tensor<string, []> aw_543_equation_0 = const()[name = tensor<string, []>("aw_543_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_543_cast_fp16 = einsum(equation = aw_543_equation_0, values = (var_3970_cast_fp16_15, var_3952_cast_fp16_15))[name = tensor<string, []>("aw_543_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4036_cast_fp16 = softmax(axis = var_3900, x = aw_513_cast_fp16)[name = tensor<string, []>("op_4036_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4037_cast_fp16 = softmax(axis = var_3900, x = aw_515_cast_fp16)[name = tensor<string, []>("op_4037_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4038_cast_fp16 = softmax(axis = var_3900, x = aw_517_cast_fp16)[name = tensor<string, []>("op_4038_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4039_cast_fp16 = softmax(axis = var_3900, x = aw_519_cast_fp16)[name = tensor<string, []>("op_4039_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4040_cast_fp16 = softmax(axis = var_3900, x = aw_521_cast_fp16)[name = tensor<string, []>("op_4040_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4041_cast_fp16 = softmax(axis = var_3900, x = aw_523_cast_fp16)[name = tensor<string, []>("op_4041_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4042_cast_fp16 = softmax(axis = var_3900, x = aw_525_cast_fp16)[name = tensor<string, []>("op_4042_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4043_cast_fp16 = softmax(axis = var_3900, x = aw_527_cast_fp16)[name = tensor<string, []>("op_4043_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4044_cast_fp16 = softmax(axis = var_3900, x = aw_529_cast_fp16)[name = tensor<string, []>("op_4044_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4045_cast_fp16 = softmax(axis = var_3900, x = aw_531_cast_fp16)[name = tensor<string, []>("op_4045_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4046_cast_fp16 = softmax(axis = var_3900, x = aw_533_cast_fp16)[name = tensor<string, []>("op_4046_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4047_cast_fp16 = softmax(axis = var_3900, x = aw_535_cast_fp16)[name = tensor<string, []>("op_4047_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4048_cast_fp16 = softmax(axis = var_3900, x = aw_537_cast_fp16)[name = tensor<string, []>("op_4048_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4049_cast_fp16 = softmax(axis = var_3900, x = aw_539_cast_fp16)[name = tensor<string, []>("op_4049_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4050_cast_fp16 = softmax(axis = var_3900, x = aw_541_cast_fp16)[name = tensor<string, []>("op_4050_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4051_cast_fp16 = softmax(axis = var_3900, x = aw_543_cast_fp16)[name = tensor<string, []>("op_4051_cast_fp16")];
+            tensor<string, []> var_4053_equation_0 = const()[name = tensor<string, []>("op_4053_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4053_cast_fp16 = einsum(equation = var_4053_equation_0, values = (var_3987_cast_fp16_0, var_4036_cast_fp16))[name = tensor<string, []>("op_4053_cast_fp16")];
+            tensor<string, []> var_4055_equation_0 = const()[name = tensor<string, []>("op_4055_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4055_cast_fp16 = einsum(equation = var_4055_equation_0, values = (var_3987_cast_fp16_1, var_4037_cast_fp16))[name = tensor<string, []>("op_4055_cast_fp16")];
+            tensor<string, []> var_4057_equation_0 = const()[name = tensor<string, []>("op_4057_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4057_cast_fp16 = einsum(equation = var_4057_equation_0, values = (var_3987_cast_fp16_2, var_4038_cast_fp16))[name = tensor<string, []>("op_4057_cast_fp16")];
+            tensor<string, []> var_4059_equation_0 = const()[name = tensor<string, []>("op_4059_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4059_cast_fp16 = einsum(equation = var_4059_equation_0, values = (var_3987_cast_fp16_3, var_4039_cast_fp16))[name = tensor<string, []>("op_4059_cast_fp16")];
+            tensor<string, []> var_4061_equation_0 = const()[name = tensor<string, []>("op_4061_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4061_cast_fp16 = einsum(equation = var_4061_equation_0, values = (var_3987_cast_fp16_4, var_4040_cast_fp16))[name = tensor<string, []>("op_4061_cast_fp16")];
+            tensor<string, []> var_4063_equation_0 = const()[name = tensor<string, []>("op_4063_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4063_cast_fp16 = einsum(equation = var_4063_equation_0, values = (var_3987_cast_fp16_5, var_4041_cast_fp16))[name = tensor<string, []>("op_4063_cast_fp16")];
+            tensor<string, []> var_4065_equation_0 = const()[name = tensor<string, []>("op_4065_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4065_cast_fp16 = einsum(equation = var_4065_equation_0, values = (var_3987_cast_fp16_6, var_4042_cast_fp16))[name = tensor<string, []>("op_4065_cast_fp16")];
+            tensor<string, []> var_4067_equation_0 = const()[name = tensor<string, []>("op_4067_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4067_cast_fp16 = einsum(equation = var_4067_equation_0, values = (var_3987_cast_fp16_7, var_4043_cast_fp16))[name = tensor<string, []>("op_4067_cast_fp16")];
+            tensor<string, []> var_4069_equation_0 = const()[name = tensor<string, []>("op_4069_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4069_cast_fp16 = einsum(equation = var_4069_equation_0, values = (var_3987_cast_fp16_8, var_4044_cast_fp16))[name = tensor<string, []>("op_4069_cast_fp16")];
+            tensor<string, []> var_4071_equation_0 = const()[name = tensor<string, []>("op_4071_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4071_cast_fp16 = einsum(equation = var_4071_equation_0, values = (var_3987_cast_fp16_9, var_4045_cast_fp16))[name = tensor<string, []>("op_4071_cast_fp16")];
+            tensor<string, []> var_4073_equation_0 = const()[name = tensor<string, []>("op_4073_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4073_cast_fp16 = einsum(equation = var_4073_equation_0, values = (var_3987_cast_fp16_10, var_4046_cast_fp16))[name = tensor<string, []>("op_4073_cast_fp16")];
+            tensor<string, []> var_4075_equation_0 = const()[name = tensor<string, []>("op_4075_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4075_cast_fp16 = einsum(equation = var_4075_equation_0, values = (var_3987_cast_fp16_11, var_4047_cast_fp16))[name = tensor<string, []>("op_4075_cast_fp16")];
+            tensor<string, []> var_4077_equation_0 = const()[name = tensor<string, []>("op_4077_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4077_cast_fp16 = einsum(equation = var_4077_equation_0, values = (var_3987_cast_fp16_12, var_4048_cast_fp16))[name = tensor<string, []>("op_4077_cast_fp16")];
+            tensor<string, []> var_4079_equation_0 = const()[name = tensor<string, []>("op_4079_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4079_cast_fp16 = einsum(equation = var_4079_equation_0, values = (var_3987_cast_fp16_13, var_4049_cast_fp16))[name = tensor<string, []>("op_4079_cast_fp16")];
+            tensor<string, []> var_4081_equation_0 = const()[name = tensor<string, []>("op_4081_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4081_cast_fp16 = einsum(equation = var_4081_equation_0, values = (var_3987_cast_fp16_14, var_4050_cast_fp16))[name = tensor<string, []>("op_4081_cast_fp16")];
+            tensor<string, []> var_4083_equation_0 = const()[name = tensor<string, []>("op_4083_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4083_cast_fp16 = einsum(equation = var_4083_equation_0, values = (var_3987_cast_fp16_15, var_4051_cast_fp16))[name = tensor<string, []>("op_4083_cast_fp16")];
+            tensor<bool, []> input_165_interleave_0 = const()[name = tensor<string, []>("input_165_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_165_cast_fp16 = concat(axis = var_3900, interleave = input_165_interleave_0, values = (var_4053_cast_fp16, var_4055_cast_fp16, var_4057_cast_fp16, var_4059_cast_fp16, var_4061_cast_fp16, var_4063_cast_fp16, var_4065_cast_fp16, var_4067_cast_fp16, var_4069_cast_fp16, var_4071_cast_fp16, var_4073_cast_fp16, var_4075_cast_fp16, var_4077_cast_fp16, var_4079_cast_fp16, var_4081_cast_fp16, var_4083_cast_fp16))[name = tensor<string, []>("input_165_cast_fp16")];
+            tensor<string, []> var_4092_pad_type_0 = const()[name = tensor<string, []>("op_4092_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4092_strides_0 = const()[name = tensor<string, []>("op_4092_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4092_pad_0 = const()[name = tensor<string, []>("op_4092_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4092_dilations_0 = const()[name = tensor<string, []>("op_4092_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4092_groups_0 = const()[name = tensor<string, []>("op_4092_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_16_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(419221312)))];
+            tensor<fp16, [1024]> blocks_16_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(421318528)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4092_cast_fp16 = conv(bias = blocks_16_attn_out_bias_to_fp16, dilations = var_4092_dilations_0, groups = var_4092_groups_0, pad = var_4092_pad_0, pad_type = var_4092_pad_type_0, strides = var_4092_strides_0, weight = blocks_16_attn_out_weight_to_fp16, x = input_165_cast_fp16)[name = tensor<string, []>("op_4092_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = var_4092_cast_fp16)[name = tensor<string, []>("inputs_67_cast_fp16")];
+            tensor<int32, [1]> input_167_axes_0 = const()[name = tensor<string, []>("input_167_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_167_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_167_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(421320640)))];
+            tensor<fp16, [1024]> input_167_beta_0_to_fp16 = const()[name = tensor<string, []>("input_167_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(421322752)))];
+            tensor<fp16, []> var_4102_to_fp16 = const()[name = tensor<string, []>("op_4102_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_167_cast_fp16 = layer_norm(axes = input_167_axes_0, beta = input_167_beta_0_to_fp16, epsilon = var_4102_to_fp16, gamma = input_167_gamma_0_to_fp16, x = inputs_67_cast_fp16)[name = tensor<string, []>("input_167_cast_fp16")];
+            tensor<string, []> input_169_pad_type_0 = const()[name = tensor<string, []>("input_169_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_169_strides_0 = const()[name = tensor<string, []>("input_169_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_169_pad_0 = const()[name = tensor<string, []>("input_169_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_169_dilations_0 = const()[name = tensor<string, []>("input_169_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_169_groups_0 = const()[name = tensor<string, []>("input_169_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_16_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(421324864)))];
+            tensor<fp16, [4096]> blocks_16_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(429713536)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_169_cast_fp16 = conv(bias = blocks_16_mlp_0_bias_to_fp16, dilations = input_169_dilations_0, groups = input_169_groups_0, pad = input_169_pad_0, pad_type = input_169_pad_type_0, strides = input_169_strides_0, weight = blocks_16_mlp_0_weight_to_fp16, x = input_167_cast_fp16)[name = tensor<string, []>("input_169_cast_fp16")];
+            tensor<string, []> input_171_mode_0 = const()[name = tensor<string, []>("input_171_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_171_cast_fp16 = gelu(mode = input_171_mode_0, x = input_169_cast_fp16)[name = tensor<string, []>("input_171_cast_fp16")];
+            tensor<string, []> var_4128_pad_type_0 = const()[name = tensor<string, []>("op_4128_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4128_strides_0 = const()[name = tensor<string, []>("op_4128_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4128_pad_0 = const()[name = tensor<string, []>("op_4128_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4128_dilations_0 = const()[name = tensor<string, []>("op_4128_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4128_groups_0 = const()[name = tensor<string, []>("op_4128_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_16_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(429721792)))];
+            tensor<fp16, [1024]> blocks_16_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_16_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438110464)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4128_cast_fp16 = conv(bias = blocks_16_mlp_2_bias_to_fp16, dilations = var_4128_dilations_0, groups = var_4128_groups_0, pad = var_4128_pad_0, pad_type = var_4128_pad_type_0, strides = var_4128_strides_0, weight = blocks_16_mlp_2_weight_to_fp16, x = input_171_cast_fp16)[name = tensor<string, []>("op_4128_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = var_4128_cast_fp16)[name = tensor<string, []>("inputs_69_cast_fp16")];
+            tensor<int32, []> var_4137 = const()[name = tensor<string, []>("op_4137"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_173_axes_0 = const()[name = tensor<string, []>("input_173_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_173_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_173_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438112576)))];
+            tensor<fp16, [1024]> input_173_beta_0_to_fp16 = const()[name = tensor<string, []>("input_173_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438114688)))];
+            tensor<fp16, []> var_4153_to_fp16 = const()[name = tensor<string, []>("op_4153_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_173_cast_fp16 = layer_norm(axes = input_173_axes_0, beta = input_173_beta_0_to_fp16, epsilon = var_4153_to_fp16, gamma = input_173_gamma_0_to_fp16, x = inputs_69_cast_fp16)[name = tensor<string, []>("input_173_cast_fp16")];
+            tensor<string, []> q_35_pad_type_0 = const()[name = tensor<string, []>("q_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_35_strides_0 = const()[name = tensor<string, []>("q_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_35_pad_0 = const()[name = tensor<string, []>("q_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_35_dilations_0 = const()[name = tensor<string, []>("q_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_35_groups_0 = const()[name = tensor<string, []>("q_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_4188_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4188_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(438116800)))];
+            tensor<fp16, [1024]> var_4188_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4188_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(440214016)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4188_cast_fp16 = conv(bias = var_4188_bias_0_to_fp16, dilations = q_35_dilations_0, groups = q_35_groups_0, pad = q_35_pad_0, pad_type = q_35_pad_type_0, strides = q_35_strides_0, weight = var_4188_weight_0_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("op_4188_cast_fp16")];
+            tensor<string, []> k_35_pad_type_0 = const()[name = tensor<string, []>("k_35_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_35_strides_0 = const()[name = tensor<string, []>("k_35_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_35_pad_0 = const()[name = tensor<string, []>("k_35_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_35_dilations_0 = const()[name = tensor<string, []>("k_35_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_35_groups_0 = const()[name = tensor<string, []>("k_35_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_17_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(440216128)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_35_cast_fp16 = conv(dilations = k_35_dilations_0, groups = k_35_groups_0, pad = k_35_pad_0, pad_type = k_35_pad_type_0, strides = k_35_strides_0, weight = blocks_17_attn_key_weight_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("k_35_cast_fp16")];
+            tensor<string, []> var_4186_pad_type_0 = const()[name = tensor<string, []>("op_4186_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4186_strides_0 = const()[name = tensor<string, []>("op_4186_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4186_pad_0 = const()[name = tensor<string, []>("op_4186_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4186_dilations_0 = const()[name = tensor<string, []>("op_4186_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4186_groups_0 = const()[name = tensor<string, []>("op_4186_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_17_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(442313344)))];
+            tensor<fp16, [1024]> blocks_17_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(444410560)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4186_cast_fp16 = conv(bias = blocks_17_attn_value_bias_to_fp16, dilations = var_4186_dilations_0, groups = var_4186_groups_0, pad = var_4186_pad_0, pad_type = var_4186_pad_type_0, strides = var_4186_strides_0, weight = blocks_17_attn_value_weight_to_fp16, x = input_173_cast_fp16)[name = tensor<string, []>("op_4186_cast_fp16")];
+            tensor<int32, [16]> tile_51 = const()[name = tensor<string, []>("tile_51"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4189_axis_0 = const()[name = tensor<string, []>("op_4189_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4189_cast_fp16_15 = split(axis = var_4189_axis_0, split_sizes = tile_51, x = var_4188_cast_fp16)[name = tensor<string, []>("op_4189_cast_fp16")];
+            tensor<int32, [4]> var_4206_perm_0 = const()[name = tensor<string, []>("op_4206_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_52 = const()[name = tensor<string, []>("tile_52"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4207_axis_0 = const()[name = tensor<string, []>("op_4207_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_4206_cast_fp16 = transpose(perm = var_4206_perm_0, x = k_35_cast_fp16)[name = tensor<string, []>("transpose_7")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4207_cast_fp16_15 = split(axis = var_4207_axis_0, split_sizes = tile_52, x = var_4206_cast_fp16)[name = tensor<string, []>("op_4207_cast_fp16")];
+            tensor<int32, [16]> tile_53 = const()[name = tensor<string, []>("tile_53"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4224_axis_0 = const()[name = tensor<string, []>("op_4224_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4224_cast_fp16_15 = split(axis = var_4224_axis_0, split_sizes = tile_53, x = var_4186_cast_fp16)[name = tensor<string, []>("op_4224_cast_fp16")];
+            tensor<string, []> aw_545_equation_0 = const()[name = tensor<string, []>("aw_545_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_545_cast_fp16 = einsum(equation = aw_545_equation_0, values = (var_4207_cast_fp16_0, var_4189_cast_fp16_0))[name = tensor<string, []>("aw_545_cast_fp16")];
+            tensor<string, []> aw_547_equation_0 = const()[name = tensor<string, []>("aw_547_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_547_cast_fp16 = einsum(equation = aw_547_equation_0, values = (var_4207_cast_fp16_1, var_4189_cast_fp16_1))[name = tensor<string, []>("aw_547_cast_fp16")];
+            tensor<string, []> aw_549_equation_0 = const()[name = tensor<string, []>("aw_549_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_549_cast_fp16 = einsum(equation = aw_549_equation_0, values = (var_4207_cast_fp16_2, var_4189_cast_fp16_2))[name = tensor<string, []>("aw_549_cast_fp16")];
+            tensor<string, []> aw_551_equation_0 = const()[name = tensor<string, []>("aw_551_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_551_cast_fp16 = einsum(equation = aw_551_equation_0, values = (var_4207_cast_fp16_3, var_4189_cast_fp16_3))[name = tensor<string, []>("aw_551_cast_fp16")];
+            tensor<string, []> aw_553_equation_0 = const()[name = tensor<string, []>("aw_553_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_553_cast_fp16 = einsum(equation = aw_553_equation_0, values = (var_4207_cast_fp16_4, var_4189_cast_fp16_4))[name = tensor<string, []>("aw_553_cast_fp16")];
+            tensor<string, []> aw_555_equation_0 = const()[name = tensor<string, []>("aw_555_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_555_cast_fp16 = einsum(equation = aw_555_equation_0, values = (var_4207_cast_fp16_5, var_4189_cast_fp16_5))[name = tensor<string, []>("aw_555_cast_fp16")];
+            tensor<string, []> aw_557_equation_0 = const()[name = tensor<string, []>("aw_557_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_557_cast_fp16 = einsum(equation = aw_557_equation_0, values = (var_4207_cast_fp16_6, var_4189_cast_fp16_6))[name = tensor<string, []>("aw_557_cast_fp16")];
+            tensor<string, []> aw_559_equation_0 = const()[name = tensor<string, []>("aw_559_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_559_cast_fp16 = einsum(equation = aw_559_equation_0, values = (var_4207_cast_fp16_7, var_4189_cast_fp16_7))[name = tensor<string, []>("aw_559_cast_fp16")];
+            tensor<string, []> aw_561_equation_0 = const()[name = tensor<string, []>("aw_561_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_561_cast_fp16 = einsum(equation = aw_561_equation_0, values = (var_4207_cast_fp16_8, var_4189_cast_fp16_8))[name = tensor<string, []>("aw_561_cast_fp16")];
+            tensor<string, []> aw_563_equation_0 = const()[name = tensor<string, []>("aw_563_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_563_cast_fp16 = einsum(equation = aw_563_equation_0, values = (var_4207_cast_fp16_9, var_4189_cast_fp16_9))[name = tensor<string, []>("aw_563_cast_fp16")];
+            tensor<string, []> aw_565_equation_0 = const()[name = tensor<string, []>("aw_565_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_565_cast_fp16 = einsum(equation = aw_565_equation_0, values = (var_4207_cast_fp16_10, var_4189_cast_fp16_10))[name = tensor<string, []>("aw_565_cast_fp16")];
+            tensor<string, []> aw_567_equation_0 = const()[name = tensor<string, []>("aw_567_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_567_cast_fp16 = einsum(equation = aw_567_equation_0, values = (var_4207_cast_fp16_11, var_4189_cast_fp16_11))[name = tensor<string, []>("aw_567_cast_fp16")];
+            tensor<string, []> aw_569_equation_0 = const()[name = tensor<string, []>("aw_569_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_569_cast_fp16 = einsum(equation = aw_569_equation_0, values = (var_4207_cast_fp16_12, var_4189_cast_fp16_12))[name = tensor<string, []>("aw_569_cast_fp16")];
+            tensor<string, []> aw_571_equation_0 = const()[name = tensor<string, []>("aw_571_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_571_cast_fp16 = einsum(equation = aw_571_equation_0, values = (var_4207_cast_fp16_13, var_4189_cast_fp16_13))[name = tensor<string, []>("aw_571_cast_fp16")];
+            tensor<string, []> aw_573_equation_0 = const()[name = tensor<string, []>("aw_573_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_573_cast_fp16 = einsum(equation = aw_573_equation_0, values = (var_4207_cast_fp16_14, var_4189_cast_fp16_14))[name = tensor<string, []>("aw_573_cast_fp16")];
+            tensor<string, []> aw_575_equation_0 = const()[name = tensor<string, []>("aw_575_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_575_cast_fp16 = einsum(equation = aw_575_equation_0, values = (var_4207_cast_fp16_15, var_4189_cast_fp16_15))[name = tensor<string, []>("aw_575_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4273_cast_fp16 = softmax(axis = var_4137, x = aw_545_cast_fp16)[name = tensor<string, []>("op_4273_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4274_cast_fp16 = softmax(axis = var_4137, x = aw_547_cast_fp16)[name = tensor<string, []>("op_4274_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4275_cast_fp16 = softmax(axis = var_4137, x = aw_549_cast_fp16)[name = tensor<string, []>("op_4275_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4276_cast_fp16 = softmax(axis = var_4137, x = aw_551_cast_fp16)[name = tensor<string, []>("op_4276_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4277_cast_fp16 = softmax(axis = var_4137, x = aw_553_cast_fp16)[name = tensor<string, []>("op_4277_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4278_cast_fp16 = softmax(axis = var_4137, x = aw_555_cast_fp16)[name = tensor<string, []>("op_4278_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4279_cast_fp16 = softmax(axis = var_4137, x = aw_557_cast_fp16)[name = tensor<string, []>("op_4279_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4280_cast_fp16 = softmax(axis = var_4137, x = aw_559_cast_fp16)[name = tensor<string, []>("op_4280_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4281_cast_fp16 = softmax(axis = var_4137, x = aw_561_cast_fp16)[name = tensor<string, []>("op_4281_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4282_cast_fp16 = softmax(axis = var_4137, x = aw_563_cast_fp16)[name = tensor<string, []>("op_4282_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4283_cast_fp16 = softmax(axis = var_4137, x = aw_565_cast_fp16)[name = tensor<string, []>("op_4283_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4284_cast_fp16 = softmax(axis = var_4137, x = aw_567_cast_fp16)[name = tensor<string, []>("op_4284_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4285_cast_fp16 = softmax(axis = var_4137, x = aw_569_cast_fp16)[name = tensor<string, []>("op_4285_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4286_cast_fp16 = softmax(axis = var_4137, x = aw_571_cast_fp16)[name = tensor<string, []>("op_4286_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4287_cast_fp16 = softmax(axis = var_4137, x = aw_573_cast_fp16)[name = tensor<string, []>("op_4287_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4288_cast_fp16 = softmax(axis = var_4137, x = aw_575_cast_fp16)[name = tensor<string, []>("op_4288_cast_fp16")];
+            tensor<string, []> var_4290_equation_0 = const()[name = tensor<string, []>("op_4290_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4290_cast_fp16 = einsum(equation = var_4290_equation_0, values = (var_4224_cast_fp16_0, var_4273_cast_fp16))[name = tensor<string, []>("op_4290_cast_fp16")];
+            tensor<string, []> var_4292_equation_0 = const()[name = tensor<string, []>("op_4292_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4292_cast_fp16 = einsum(equation = var_4292_equation_0, values = (var_4224_cast_fp16_1, var_4274_cast_fp16))[name = tensor<string, []>("op_4292_cast_fp16")];
+            tensor<string, []> var_4294_equation_0 = const()[name = tensor<string, []>("op_4294_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4294_cast_fp16 = einsum(equation = var_4294_equation_0, values = (var_4224_cast_fp16_2, var_4275_cast_fp16))[name = tensor<string, []>("op_4294_cast_fp16")];
+            tensor<string, []> var_4296_equation_0 = const()[name = tensor<string, []>("op_4296_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4296_cast_fp16 = einsum(equation = var_4296_equation_0, values = (var_4224_cast_fp16_3, var_4276_cast_fp16))[name = tensor<string, []>("op_4296_cast_fp16")];
+            tensor<string, []> var_4298_equation_0 = const()[name = tensor<string, []>("op_4298_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4298_cast_fp16 = einsum(equation = var_4298_equation_0, values = (var_4224_cast_fp16_4, var_4277_cast_fp16))[name = tensor<string, []>("op_4298_cast_fp16")];
+            tensor<string, []> var_4300_equation_0 = const()[name = tensor<string, []>("op_4300_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4300_cast_fp16 = einsum(equation = var_4300_equation_0, values = (var_4224_cast_fp16_5, var_4278_cast_fp16))[name = tensor<string, []>("op_4300_cast_fp16")];
+            tensor<string, []> var_4302_equation_0 = const()[name = tensor<string, []>("op_4302_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4302_cast_fp16 = einsum(equation = var_4302_equation_0, values = (var_4224_cast_fp16_6, var_4279_cast_fp16))[name = tensor<string, []>("op_4302_cast_fp16")];
+            tensor<string, []> var_4304_equation_0 = const()[name = tensor<string, []>("op_4304_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4304_cast_fp16 = einsum(equation = var_4304_equation_0, values = (var_4224_cast_fp16_7, var_4280_cast_fp16))[name = tensor<string, []>("op_4304_cast_fp16")];
+            tensor<string, []> var_4306_equation_0 = const()[name = tensor<string, []>("op_4306_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4306_cast_fp16 = einsum(equation = var_4306_equation_0, values = (var_4224_cast_fp16_8, var_4281_cast_fp16))[name = tensor<string, []>("op_4306_cast_fp16")];
+            tensor<string, []> var_4308_equation_0 = const()[name = tensor<string, []>("op_4308_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4308_cast_fp16 = einsum(equation = var_4308_equation_0, values = (var_4224_cast_fp16_9, var_4282_cast_fp16))[name = tensor<string, []>("op_4308_cast_fp16")];
+            tensor<string, []> var_4310_equation_0 = const()[name = tensor<string, []>("op_4310_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4310_cast_fp16 = einsum(equation = var_4310_equation_0, values = (var_4224_cast_fp16_10, var_4283_cast_fp16))[name = tensor<string, []>("op_4310_cast_fp16")];
+            tensor<string, []> var_4312_equation_0 = const()[name = tensor<string, []>("op_4312_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4312_cast_fp16 = einsum(equation = var_4312_equation_0, values = (var_4224_cast_fp16_11, var_4284_cast_fp16))[name = tensor<string, []>("op_4312_cast_fp16")];
+            tensor<string, []> var_4314_equation_0 = const()[name = tensor<string, []>("op_4314_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4314_cast_fp16 = einsum(equation = var_4314_equation_0, values = (var_4224_cast_fp16_12, var_4285_cast_fp16))[name = tensor<string, []>("op_4314_cast_fp16")];
+            tensor<string, []> var_4316_equation_0 = const()[name = tensor<string, []>("op_4316_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4316_cast_fp16 = einsum(equation = var_4316_equation_0, values = (var_4224_cast_fp16_13, var_4286_cast_fp16))[name = tensor<string, []>("op_4316_cast_fp16")];
+            tensor<string, []> var_4318_equation_0 = const()[name = tensor<string, []>("op_4318_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4318_cast_fp16 = einsum(equation = var_4318_equation_0, values = (var_4224_cast_fp16_14, var_4287_cast_fp16))[name = tensor<string, []>("op_4318_cast_fp16")];
+            tensor<string, []> var_4320_equation_0 = const()[name = tensor<string, []>("op_4320_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4320_cast_fp16 = einsum(equation = var_4320_equation_0, values = (var_4224_cast_fp16_15, var_4288_cast_fp16))[name = tensor<string, []>("op_4320_cast_fp16")];
+            tensor<bool, []> input_175_interleave_0 = const()[name = tensor<string, []>("input_175_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_175_cast_fp16 = concat(axis = var_4137, interleave = input_175_interleave_0, values = (var_4290_cast_fp16, var_4292_cast_fp16, var_4294_cast_fp16, var_4296_cast_fp16, var_4298_cast_fp16, var_4300_cast_fp16, var_4302_cast_fp16, var_4304_cast_fp16, var_4306_cast_fp16, var_4308_cast_fp16, var_4310_cast_fp16, var_4312_cast_fp16, var_4314_cast_fp16, var_4316_cast_fp16, var_4318_cast_fp16, var_4320_cast_fp16))[name = tensor<string, []>("input_175_cast_fp16")];
+            tensor<string, []> var_4329_pad_type_0 = const()[name = tensor<string, []>("op_4329_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4329_strides_0 = const()[name = tensor<string, []>("op_4329_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4329_pad_0 = const()[name = tensor<string, []>("op_4329_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4329_dilations_0 = const()[name = tensor<string, []>("op_4329_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4329_groups_0 = const()[name = tensor<string, []>("op_4329_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_17_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(444412672)))];
+            tensor<fp16, [1024]> blocks_17_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446509888)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4329_cast_fp16 = conv(bias = blocks_17_attn_out_bias_to_fp16, dilations = var_4329_dilations_0, groups = var_4329_groups_0, pad = var_4329_pad_0, pad_type = var_4329_pad_type_0, strides = var_4329_strides_0, weight = blocks_17_attn_out_weight_to_fp16, x = input_175_cast_fp16)[name = tensor<string, []>("op_4329_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = var_4329_cast_fp16)[name = tensor<string, []>("inputs_71_cast_fp16")];
+            tensor<int32, [1]> input_177_axes_0 = const()[name = tensor<string, []>("input_177_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_177_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_177_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446512000)))];
+            tensor<fp16, [1024]> input_177_beta_0_to_fp16 = const()[name = tensor<string, []>("input_177_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446514112)))];
+            tensor<fp16, []> var_4339_to_fp16 = const()[name = tensor<string, []>("op_4339_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_177_cast_fp16 = layer_norm(axes = input_177_axes_0, beta = input_177_beta_0_to_fp16, epsilon = var_4339_to_fp16, gamma = input_177_gamma_0_to_fp16, x = inputs_71_cast_fp16)[name = tensor<string, []>("input_177_cast_fp16")];
+            tensor<string, []> input_179_pad_type_0 = const()[name = tensor<string, []>("input_179_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_179_strides_0 = const()[name = tensor<string, []>("input_179_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_179_pad_0 = const()[name = tensor<string, []>("input_179_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_179_dilations_0 = const()[name = tensor<string, []>("input_179_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_179_groups_0 = const()[name = tensor<string, []>("input_179_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_17_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(446516224)))];
+            tensor<fp16, [4096]> blocks_17_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(454904896)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_179_cast_fp16 = conv(bias = blocks_17_mlp_0_bias_to_fp16, dilations = input_179_dilations_0, groups = input_179_groups_0, pad = input_179_pad_0, pad_type = input_179_pad_type_0, strides = input_179_strides_0, weight = blocks_17_mlp_0_weight_to_fp16, x = input_177_cast_fp16)[name = tensor<string, []>("input_179_cast_fp16")];
+            tensor<string, []> input_181_mode_0 = const()[name = tensor<string, []>("input_181_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_181_cast_fp16 = gelu(mode = input_181_mode_0, x = input_179_cast_fp16)[name = tensor<string, []>("input_181_cast_fp16")];
+            tensor<string, []> var_4365_pad_type_0 = const()[name = tensor<string, []>("op_4365_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4365_strides_0 = const()[name = tensor<string, []>("op_4365_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4365_pad_0 = const()[name = tensor<string, []>("op_4365_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4365_dilations_0 = const()[name = tensor<string, []>("op_4365_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4365_groups_0 = const()[name = tensor<string, []>("op_4365_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_17_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(454913152)))];
+            tensor<fp16, [1024]> blocks_17_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_17_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463301824)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4365_cast_fp16 = conv(bias = blocks_17_mlp_2_bias_to_fp16, dilations = var_4365_dilations_0, groups = var_4365_groups_0, pad = var_4365_pad_0, pad_type = var_4365_pad_type_0, strides = var_4365_strides_0, weight = blocks_17_mlp_2_weight_to_fp16, x = input_181_cast_fp16)[name = tensor<string, []>("op_4365_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_73_cast_fp16 = add(x = inputs_71_cast_fp16, y = var_4365_cast_fp16)[name = tensor<string, []>("inputs_73_cast_fp16")];
+            tensor<int32, []> var_4374 = const()[name = tensor<string, []>("op_4374"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_183_axes_0 = const()[name = tensor<string, []>("input_183_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_183_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_183_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463303936)))];
+            tensor<fp16, [1024]> input_183_beta_0_to_fp16 = const()[name = tensor<string, []>("input_183_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463306048)))];
+            tensor<fp16, []> var_4390_to_fp16 = const()[name = tensor<string, []>("op_4390_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_183_cast_fp16 = layer_norm(axes = input_183_axes_0, beta = input_183_beta_0_to_fp16, epsilon = var_4390_to_fp16, gamma = input_183_gamma_0_to_fp16, x = inputs_73_cast_fp16)[name = tensor<string, []>("input_183_cast_fp16")];
+            tensor<string, []> q_37_pad_type_0 = const()[name = tensor<string, []>("q_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_37_strides_0 = const()[name = tensor<string, []>("q_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_37_pad_0 = const()[name = tensor<string, []>("q_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_37_dilations_0 = const()[name = tensor<string, []>("q_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_37_groups_0 = const()[name = tensor<string, []>("q_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_4425_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4425_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(463308160)))];
+            tensor<fp16, [1024]> var_4425_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4425_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(465405376)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4425_cast_fp16 = conv(bias = var_4425_bias_0_to_fp16, dilations = q_37_dilations_0, groups = q_37_groups_0, pad = q_37_pad_0, pad_type = q_37_pad_type_0, strides = q_37_strides_0, weight = var_4425_weight_0_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("op_4425_cast_fp16")];
+            tensor<string, []> k_37_pad_type_0 = const()[name = tensor<string, []>("k_37_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_37_strides_0 = const()[name = tensor<string, []>("k_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_37_pad_0 = const()[name = tensor<string, []>("k_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_37_dilations_0 = const()[name = tensor<string, []>("k_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_37_groups_0 = const()[name = tensor<string, []>("k_37_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_18_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(465407488)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_37_cast_fp16 = conv(dilations = k_37_dilations_0, groups = k_37_groups_0, pad = k_37_pad_0, pad_type = k_37_pad_type_0, strides = k_37_strides_0, weight = blocks_18_attn_key_weight_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("k_37_cast_fp16")];
+            tensor<string, []> var_4423_pad_type_0 = const()[name = tensor<string, []>("op_4423_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4423_strides_0 = const()[name = tensor<string, []>("op_4423_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4423_pad_0 = const()[name = tensor<string, []>("op_4423_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4423_dilations_0 = const()[name = tensor<string, []>("op_4423_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4423_groups_0 = const()[name = tensor<string, []>("op_4423_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_18_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(467504704)))];
+            tensor<fp16, [1024]> blocks_18_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(469601920)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4423_cast_fp16 = conv(bias = blocks_18_attn_value_bias_to_fp16, dilations = var_4423_dilations_0, groups = var_4423_groups_0, pad = var_4423_pad_0, pad_type = var_4423_pad_type_0, strides = var_4423_strides_0, weight = blocks_18_attn_value_weight_to_fp16, x = input_183_cast_fp16)[name = tensor<string, []>("op_4423_cast_fp16")];
+            tensor<int32, [16]> tile_54 = const()[name = tensor<string, []>("tile_54"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4426_axis_0 = const()[name = tensor<string, []>("op_4426_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4426_cast_fp16_15 = split(axis = var_4426_axis_0, split_sizes = tile_54, x = var_4425_cast_fp16)[name = tensor<string, []>("op_4426_cast_fp16")];
+            tensor<int32, [4]> var_4443_perm_0 = const()[name = tensor<string, []>("op_4443_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_55 = const()[name = tensor<string, []>("tile_55"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4444_axis_0 = const()[name = tensor<string, []>("op_4444_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_4443_cast_fp16 = transpose(perm = var_4443_perm_0, x = k_37_cast_fp16)[name = tensor<string, []>("transpose_6")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4444_cast_fp16_15 = split(axis = var_4444_axis_0, split_sizes = tile_55, x = var_4443_cast_fp16)[name = tensor<string, []>("op_4444_cast_fp16")];
+            tensor<int32, [16]> tile_56 = const()[name = tensor<string, []>("tile_56"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4461_axis_0 = const()[name = tensor<string, []>("op_4461_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4461_cast_fp16_15 = split(axis = var_4461_axis_0, split_sizes = tile_56, x = var_4423_cast_fp16)[name = tensor<string, []>("op_4461_cast_fp16")];
+            tensor<string, []> aw_577_equation_0 = const()[name = tensor<string, []>("aw_577_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_577_cast_fp16 = einsum(equation = aw_577_equation_0, values = (var_4444_cast_fp16_0, var_4426_cast_fp16_0))[name = tensor<string, []>("aw_577_cast_fp16")];
+            tensor<string, []> aw_579_equation_0 = const()[name = tensor<string, []>("aw_579_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_579_cast_fp16 = einsum(equation = aw_579_equation_0, values = (var_4444_cast_fp16_1, var_4426_cast_fp16_1))[name = tensor<string, []>("aw_579_cast_fp16")];
+            tensor<string, []> aw_581_equation_0 = const()[name = tensor<string, []>("aw_581_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_581_cast_fp16 = einsum(equation = aw_581_equation_0, values = (var_4444_cast_fp16_2, var_4426_cast_fp16_2))[name = tensor<string, []>("aw_581_cast_fp16")];
+            tensor<string, []> aw_583_equation_0 = const()[name = tensor<string, []>("aw_583_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_583_cast_fp16 = einsum(equation = aw_583_equation_0, values = (var_4444_cast_fp16_3, var_4426_cast_fp16_3))[name = tensor<string, []>("aw_583_cast_fp16")];
+            tensor<string, []> aw_585_equation_0 = const()[name = tensor<string, []>("aw_585_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_585_cast_fp16 = einsum(equation = aw_585_equation_0, values = (var_4444_cast_fp16_4, var_4426_cast_fp16_4))[name = tensor<string, []>("aw_585_cast_fp16")];
+            tensor<string, []> aw_587_equation_0 = const()[name = tensor<string, []>("aw_587_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_587_cast_fp16 = einsum(equation = aw_587_equation_0, values = (var_4444_cast_fp16_5, var_4426_cast_fp16_5))[name = tensor<string, []>("aw_587_cast_fp16")];
+            tensor<string, []> aw_589_equation_0 = const()[name = tensor<string, []>("aw_589_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_589_cast_fp16 = einsum(equation = aw_589_equation_0, values = (var_4444_cast_fp16_6, var_4426_cast_fp16_6))[name = tensor<string, []>("aw_589_cast_fp16")];
+            tensor<string, []> aw_591_equation_0 = const()[name = tensor<string, []>("aw_591_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_591_cast_fp16 = einsum(equation = aw_591_equation_0, values = (var_4444_cast_fp16_7, var_4426_cast_fp16_7))[name = tensor<string, []>("aw_591_cast_fp16")];
+            tensor<string, []> aw_593_equation_0 = const()[name = tensor<string, []>("aw_593_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_593_cast_fp16 = einsum(equation = aw_593_equation_0, values = (var_4444_cast_fp16_8, var_4426_cast_fp16_8))[name = tensor<string, []>("aw_593_cast_fp16")];
+            tensor<string, []> aw_595_equation_0 = const()[name = tensor<string, []>("aw_595_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_595_cast_fp16 = einsum(equation = aw_595_equation_0, values = (var_4444_cast_fp16_9, var_4426_cast_fp16_9))[name = tensor<string, []>("aw_595_cast_fp16")];
+            tensor<string, []> aw_597_equation_0 = const()[name = tensor<string, []>("aw_597_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_597_cast_fp16 = einsum(equation = aw_597_equation_0, values = (var_4444_cast_fp16_10, var_4426_cast_fp16_10))[name = tensor<string, []>("aw_597_cast_fp16")];
+            tensor<string, []> aw_599_equation_0 = const()[name = tensor<string, []>("aw_599_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_599_cast_fp16 = einsum(equation = aw_599_equation_0, values = (var_4444_cast_fp16_11, var_4426_cast_fp16_11))[name = tensor<string, []>("aw_599_cast_fp16")];
+            tensor<string, []> aw_601_equation_0 = const()[name = tensor<string, []>("aw_601_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_601_cast_fp16 = einsum(equation = aw_601_equation_0, values = (var_4444_cast_fp16_12, var_4426_cast_fp16_12))[name = tensor<string, []>("aw_601_cast_fp16")];
+            tensor<string, []> aw_603_equation_0 = const()[name = tensor<string, []>("aw_603_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_603_cast_fp16 = einsum(equation = aw_603_equation_0, values = (var_4444_cast_fp16_13, var_4426_cast_fp16_13))[name = tensor<string, []>("aw_603_cast_fp16")];
+            tensor<string, []> aw_605_equation_0 = const()[name = tensor<string, []>("aw_605_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_605_cast_fp16 = einsum(equation = aw_605_equation_0, values = (var_4444_cast_fp16_14, var_4426_cast_fp16_14))[name = tensor<string, []>("aw_605_cast_fp16")];
+            tensor<string, []> aw_607_equation_0 = const()[name = tensor<string, []>("aw_607_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_607_cast_fp16 = einsum(equation = aw_607_equation_0, values = (var_4444_cast_fp16_15, var_4426_cast_fp16_15))[name = tensor<string, []>("aw_607_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4510_cast_fp16 = softmax(axis = var_4374, x = aw_577_cast_fp16)[name = tensor<string, []>("op_4510_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4511_cast_fp16 = softmax(axis = var_4374, x = aw_579_cast_fp16)[name = tensor<string, []>("op_4511_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4512_cast_fp16 = softmax(axis = var_4374, x = aw_581_cast_fp16)[name = tensor<string, []>("op_4512_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4513_cast_fp16 = softmax(axis = var_4374, x = aw_583_cast_fp16)[name = tensor<string, []>("op_4513_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4514_cast_fp16 = softmax(axis = var_4374, x = aw_585_cast_fp16)[name = tensor<string, []>("op_4514_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4515_cast_fp16 = softmax(axis = var_4374, x = aw_587_cast_fp16)[name = tensor<string, []>("op_4515_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4516_cast_fp16 = softmax(axis = var_4374, x = aw_589_cast_fp16)[name = tensor<string, []>("op_4516_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4517_cast_fp16 = softmax(axis = var_4374, x = aw_591_cast_fp16)[name = tensor<string, []>("op_4517_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4518_cast_fp16 = softmax(axis = var_4374, x = aw_593_cast_fp16)[name = tensor<string, []>("op_4518_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4519_cast_fp16 = softmax(axis = var_4374, x = aw_595_cast_fp16)[name = tensor<string, []>("op_4519_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4520_cast_fp16 = softmax(axis = var_4374, x = aw_597_cast_fp16)[name = tensor<string, []>("op_4520_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4521_cast_fp16 = softmax(axis = var_4374, x = aw_599_cast_fp16)[name = tensor<string, []>("op_4521_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4522_cast_fp16 = softmax(axis = var_4374, x = aw_601_cast_fp16)[name = tensor<string, []>("op_4522_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4523_cast_fp16 = softmax(axis = var_4374, x = aw_603_cast_fp16)[name = tensor<string, []>("op_4523_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4524_cast_fp16 = softmax(axis = var_4374, x = aw_605_cast_fp16)[name = tensor<string, []>("op_4524_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4525_cast_fp16 = softmax(axis = var_4374, x = aw_607_cast_fp16)[name = tensor<string, []>("op_4525_cast_fp16")];
+            tensor<string, []> var_4527_equation_0 = const()[name = tensor<string, []>("op_4527_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4527_cast_fp16 = einsum(equation = var_4527_equation_0, values = (var_4461_cast_fp16_0, var_4510_cast_fp16))[name = tensor<string, []>("op_4527_cast_fp16")];
+            tensor<string, []> var_4529_equation_0 = const()[name = tensor<string, []>("op_4529_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4529_cast_fp16 = einsum(equation = var_4529_equation_0, values = (var_4461_cast_fp16_1, var_4511_cast_fp16))[name = tensor<string, []>("op_4529_cast_fp16")];
+            tensor<string, []> var_4531_equation_0 = const()[name = tensor<string, []>("op_4531_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4531_cast_fp16 = einsum(equation = var_4531_equation_0, values = (var_4461_cast_fp16_2, var_4512_cast_fp16))[name = tensor<string, []>("op_4531_cast_fp16")];
+            tensor<string, []> var_4533_equation_0 = const()[name = tensor<string, []>("op_4533_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4533_cast_fp16 = einsum(equation = var_4533_equation_0, values = (var_4461_cast_fp16_3, var_4513_cast_fp16))[name = tensor<string, []>("op_4533_cast_fp16")];
+            tensor<string, []> var_4535_equation_0 = const()[name = tensor<string, []>("op_4535_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4535_cast_fp16 = einsum(equation = var_4535_equation_0, values = (var_4461_cast_fp16_4, var_4514_cast_fp16))[name = tensor<string, []>("op_4535_cast_fp16")];
+            tensor<string, []> var_4537_equation_0 = const()[name = tensor<string, []>("op_4537_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4537_cast_fp16 = einsum(equation = var_4537_equation_0, values = (var_4461_cast_fp16_5, var_4515_cast_fp16))[name = tensor<string, []>("op_4537_cast_fp16")];
+            tensor<string, []> var_4539_equation_0 = const()[name = tensor<string, []>("op_4539_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4539_cast_fp16 = einsum(equation = var_4539_equation_0, values = (var_4461_cast_fp16_6, var_4516_cast_fp16))[name = tensor<string, []>("op_4539_cast_fp16")];
+            tensor<string, []> var_4541_equation_0 = const()[name = tensor<string, []>("op_4541_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4541_cast_fp16 = einsum(equation = var_4541_equation_0, values = (var_4461_cast_fp16_7, var_4517_cast_fp16))[name = tensor<string, []>("op_4541_cast_fp16")];
+            tensor<string, []> var_4543_equation_0 = const()[name = tensor<string, []>("op_4543_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4543_cast_fp16 = einsum(equation = var_4543_equation_0, values = (var_4461_cast_fp16_8, var_4518_cast_fp16))[name = tensor<string, []>("op_4543_cast_fp16")];
+            tensor<string, []> var_4545_equation_0 = const()[name = tensor<string, []>("op_4545_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4545_cast_fp16 = einsum(equation = var_4545_equation_0, values = (var_4461_cast_fp16_9, var_4519_cast_fp16))[name = tensor<string, []>("op_4545_cast_fp16")];
+            tensor<string, []> var_4547_equation_0 = const()[name = tensor<string, []>("op_4547_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4547_cast_fp16 = einsum(equation = var_4547_equation_0, values = (var_4461_cast_fp16_10, var_4520_cast_fp16))[name = tensor<string, []>("op_4547_cast_fp16")];
+            tensor<string, []> var_4549_equation_0 = const()[name = tensor<string, []>("op_4549_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4549_cast_fp16 = einsum(equation = var_4549_equation_0, values = (var_4461_cast_fp16_11, var_4521_cast_fp16))[name = tensor<string, []>("op_4549_cast_fp16")];
+            tensor<string, []> var_4551_equation_0 = const()[name = tensor<string, []>("op_4551_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4551_cast_fp16 = einsum(equation = var_4551_equation_0, values = (var_4461_cast_fp16_12, var_4522_cast_fp16))[name = tensor<string, []>("op_4551_cast_fp16")];
+            tensor<string, []> var_4553_equation_0 = const()[name = tensor<string, []>("op_4553_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4553_cast_fp16 = einsum(equation = var_4553_equation_0, values = (var_4461_cast_fp16_13, var_4523_cast_fp16))[name = tensor<string, []>("op_4553_cast_fp16")];
+            tensor<string, []> var_4555_equation_0 = const()[name = tensor<string, []>("op_4555_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4555_cast_fp16 = einsum(equation = var_4555_equation_0, values = (var_4461_cast_fp16_14, var_4524_cast_fp16))[name = tensor<string, []>("op_4555_cast_fp16")];
+            tensor<string, []> var_4557_equation_0 = const()[name = tensor<string, []>("op_4557_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4557_cast_fp16 = einsum(equation = var_4557_equation_0, values = (var_4461_cast_fp16_15, var_4525_cast_fp16))[name = tensor<string, []>("op_4557_cast_fp16")];
+            tensor<bool, []> input_185_interleave_0 = const()[name = tensor<string, []>("input_185_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_185_cast_fp16 = concat(axis = var_4374, interleave = input_185_interleave_0, values = (var_4527_cast_fp16, var_4529_cast_fp16, var_4531_cast_fp16, var_4533_cast_fp16, var_4535_cast_fp16, var_4537_cast_fp16, var_4539_cast_fp16, var_4541_cast_fp16, var_4543_cast_fp16, var_4545_cast_fp16, var_4547_cast_fp16, var_4549_cast_fp16, var_4551_cast_fp16, var_4553_cast_fp16, var_4555_cast_fp16, var_4557_cast_fp16))[name = tensor<string, []>("input_185_cast_fp16")];
+            tensor<string, []> var_4566_pad_type_0 = const()[name = tensor<string, []>("op_4566_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4566_strides_0 = const()[name = tensor<string, []>("op_4566_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4566_pad_0 = const()[name = tensor<string, []>("op_4566_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4566_dilations_0 = const()[name = tensor<string, []>("op_4566_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4566_groups_0 = const()[name = tensor<string, []>("op_4566_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_18_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(469604032)))];
+            tensor<fp16, [1024]> blocks_18_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471701248)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4566_cast_fp16 = conv(bias = blocks_18_attn_out_bias_to_fp16, dilations = var_4566_dilations_0, groups = var_4566_groups_0, pad = var_4566_pad_0, pad_type = var_4566_pad_type_0, strides = var_4566_strides_0, weight = blocks_18_attn_out_weight_to_fp16, x = input_185_cast_fp16)[name = tensor<string, []>("op_4566_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_75_cast_fp16 = add(x = inputs_73_cast_fp16, y = var_4566_cast_fp16)[name = tensor<string, []>("inputs_75_cast_fp16")];
+            tensor<int32, [1]> input_187_axes_0 = const()[name = tensor<string, []>("input_187_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_187_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_187_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471703360)))];
+            tensor<fp16, [1024]> input_187_beta_0_to_fp16 = const()[name = tensor<string, []>("input_187_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471705472)))];
+            tensor<fp16, []> var_4576_to_fp16 = const()[name = tensor<string, []>("op_4576_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_187_cast_fp16 = layer_norm(axes = input_187_axes_0, beta = input_187_beta_0_to_fp16, epsilon = var_4576_to_fp16, gamma = input_187_gamma_0_to_fp16, x = inputs_75_cast_fp16)[name = tensor<string, []>("input_187_cast_fp16")];
+            tensor<string, []> input_189_pad_type_0 = const()[name = tensor<string, []>("input_189_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_189_strides_0 = const()[name = tensor<string, []>("input_189_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_189_pad_0 = const()[name = tensor<string, []>("input_189_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_189_dilations_0 = const()[name = tensor<string, []>("input_189_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_189_groups_0 = const()[name = tensor<string, []>("input_189_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_18_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(471707584)))];
+            tensor<fp16, [4096]> blocks_18_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(480096256)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_189_cast_fp16 = conv(bias = blocks_18_mlp_0_bias_to_fp16, dilations = input_189_dilations_0, groups = input_189_groups_0, pad = input_189_pad_0, pad_type = input_189_pad_type_0, strides = input_189_strides_0, weight = blocks_18_mlp_0_weight_to_fp16, x = input_187_cast_fp16)[name = tensor<string, []>("input_189_cast_fp16")];
+            tensor<string, []> input_191_mode_0 = const()[name = tensor<string, []>("input_191_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_191_cast_fp16 = gelu(mode = input_191_mode_0, x = input_189_cast_fp16)[name = tensor<string, []>("input_191_cast_fp16")];
+            tensor<string, []> var_4602_pad_type_0 = const()[name = tensor<string, []>("op_4602_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4602_strides_0 = const()[name = tensor<string, []>("op_4602_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4602_pad_0 = const()[name = tensor<string, []>("op_4602_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4602_dilations_0 = const()[name = tensor<string, []>("op_4602_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4602_groups_0 = const()[name = tensor<string, []>("op_4602_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_18_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(480104512)))];
+            tensor<fp16, [1024]> blocks_18_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_18_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488493184)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4602_cast_fp16 = conv(bias = blocks_18_mlp_2_bias_to_fp16, dilations = var_4602_dilations_0, groups = var_4602_groups_0, pad = var_4602_pad_0, pad_type = var_4602_pad_type_0, strides = var_4602_strides_0, weight = blocks_18_mlp_2_weight_to_fp16, x = input_191_cast_fp16)[name = tensor<string, []>("op_4602_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_77_cast_fp16 = add(x = inputs_75_cast_fp16, y = var_4602_cast_fp16)[name = tensor<string, []>("inputs_77_cast_fp16")];
+            tensor<int32, []> var_4611 = const()[name = tensor<string, []>("op_4611"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_193_axes_0 = const()[name = tensor<string, []>("input_193_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_193_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_193_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488495296)))];
+            tensor<fp16, [1024]> input_193_beta_0_to_fp16 = const()[name = tensor<string, []>("input_193_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488497408)))];
+            tensor<fp16, []> var_4627_to_fp16 = const()[name = tensor<string, []>("op_4627_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_193_cast_fp16 = layer_norm(axes = input_193_axes_0, beta = input_193_beta_0_to_fp16, epsilon = var_4627_to_fp16, gamma = input_193_gamma_0_to_fp16, x = inputs_77_cast_fp16)[name = tensor<string, []>("input_193_cast_fp16")];
+            tensor<string, []> q_39_pad_type_0 = const()[name = tensor<string, []>("q_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_39_strides_0 = const()[name = tensor<string, []>("q_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_39_pad_0 = const()[name = tensor<string, []>("q_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_39_dilations_0 = const()[name = tensor<string, []>("q_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_39_groups_0 = const()[name = tensor<string, []>("q_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_4662_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4662_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(488499520)))];
+            tensor<fp16, [1024]> var_4662_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4662_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(490596736)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4662_cast_fp16 = conv(bias = var_4662_bias_0_to_fp16, dilations = q_39_dilations_0, groups = q_39_groups_0, pad = q_39_pad_0, pad_type = q_39_pad_type_0, strides = q_39_strides_0, weight = var_4662_weight_0_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("op_4662_cast_fp16")];
+            tensor<string, []> k_39_pad_type_0 = const()[name = tensor<string, []>("k_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_39_strides_0 = const()[name = tensor<string, []>("k_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_39_pad_0 = const()[name = tensor<string, []>("k_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_39_dilations_0 = const()[name = tensor<string, []>("k_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_39_groups_0 = const()[name = tensor<string, []>("k_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_19_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(490598848)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_39_cast_fp16 = conv(dilations = k_39_dilations_0, groups = k_39_groups_0, pad = k_39_pad_0, pad_type = k_39_pad_type_0, strides = k_39_strides_0, weight = blocks_19_attn_key_weight_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("k_39_cast_fp16")];
+            tensor<string, []> var_4660_pad_type_0 = const()[name = tensor<string, []>("op_4660_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4660_strides_0 = const()[name = tensor<string, []>("op_4660_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4660_pad_0 = const()[name = tensor<string, []>("op_4660_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4660_dilations_0 = const()[name = tensor<string, []>("op_4660_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4660_groups_0 = const()[name = tensor<string, []>("op_4660_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_19_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(492696064)))];
+            tensor<fp16, [1024]> blocks_19_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(494793280)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4660_cast_fp16 = conv(bias = blocks_19_attn_value_bias_to_fp16, dilations = var_4660_dilations_0, groups = var_4660_groups_0, pad = var_4660_pad_0, pad_type = var_4660_pad_type_0, strides = var_4660_strides_0, weight = blocks_19_attn_value_weight_to_fp16, x = input_193_cast_fp16)[name = tensor<string, []>("op_4660_cast_fp16")];
+            tensor<int32, [16]> tile_57 = const()[name = tensor<string, []>("tile_57"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4663_axis_0 = const()[name = tensor<string, []>("op_4663_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4663_cast_fp16_15 = split(axis = var_4663_axis_0, split_sizes = tile_57, x = var_4662_cast_fp16)[name = tensor<string, []>("op_4663_cast_fp16")];
+            tensor<int32, [4]> var_4680_perm_0 = const()[name = tensor<string, []>("op_4680_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_58 = const()[name = tensor<string, []>("tile_58"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4681_axis_0 = const()[name = tensor<string, []>("op_4681_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_4680_cast_fp16 = transpose(perm = var_4680_perm_0, x = k_39_cast_fp16)[name = tensor<string, []>("transpose_5")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4681_cast_fp16_15 = split(axis = var_4681_axis_0, split_sizes = tile_58, x = var_4680_cast_fp16)[name = tensor<string, []>("op_4681_cast_fp16")];
+            tensor<int32, [16]> tile_59 = const()[name = tensor<string, []>("tile_59"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4698_axis_0 = const()[name = tensor<string, []>("op_4698_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4698_cast_fp16_15 = split(axis = var_4698_axis_0, split_sizes = tile_59, x = var_4660_cast_fp16)[name = tensor<string, []>("op_4698_cast_fp16")];
+            tensor<string, []> aw_609_equation_0 = const()[name = tensor<string, []>("aw_609_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_609_cast_fp16 = einsum(equation = aw_609_equation_0, values = (var_4681_cast_fp16_0, var_4663_cast_fp16_0))[name = tensor<string, []>("aw_609_cast_fp16")];
+            tensor<string, []> aw_611_equation_0 = const()[name = tensor<string, []>("aw_611_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_611_cast_fp16 = einsum(equation = aw_611_equation_0, values = (var_4681_cast_fp16_1, var_4663_cast_fp16_1))[name = tensor<string, []>("aw_611_cast_fp16")];
+            tensor<string, []> aw_613_equation_0 = const()[name = tensor<string, []>("aw_613_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_613_cast_fp16 = einsum(equation = aw_613_equation_0, values = (var_4681_cast_fp16_2, var_4663_cast_fp16_2))[name = tensor<string, []>("aw_613_cast_fp16")];
+            tensor<string, []> aw_615_equation_0 = const()[name = tensor<string, []>("aw_615_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_615_cast_fp16 = einsum(equation = aw_615_equation_0, values = (var_4681_cast_fp16_3, var_4663_cast_fp16_3))[name = tensor<string, []>("aw_615_cast_fp16")];
+            tensor<string, []> aw_617_equation_0 = const()[name = tensor<string, []>("aw_617_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_617_cast_fp16 = einsum(equation = aw_617_equation_0, values = (var_4681_cast_fp16_4, var_4663_cast_fp16_4))[name = tensor<string, []>("aw_617_cast_fp16")];
+            tensor<string, []> aw_619_equation_0 = const()[name = tensor<string, []>("aw_619_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_619_cast_fp16 = einsum(equation = aw_619_equation_0, values = (var_4681_cast_fp16_5, var_4663_cast_fp16_5))[name = tensor<string, []>("aw_619_cast_fp16")];
+            tensor<string, []> aw_621_equation_0 = const()[name = tensor<string, []>("aw_621_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_621_cast_fp16 = einsum(equation = aw_621_equation_0, values = (var_4681_cast_fp16_6, var_4663_cast_fp16_6))[name = tensor<string, []>("aw_621_cast_fp16")];
+            tensor<string, []> aw_623_equation_0 = const()[name = tensor<string, []>("aw_623_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_623_cast_fp16 = einsum(equation = aw_623_equation_0, values = (var_4681_cast_fp16_7, var_4663_cast_fp16_7))[name = tensor<string, []>("aw_623_cast_fp16")];
+            tensor<string, []> aw_625_equation_0 = const()[name = tensor<string, []>("aw_625_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_625_cast_fp16 = einsum(equation = aw_625_equation_0, values = (var_4681_cast_fp16_8, var_4663_cast_fp16_8))[name = tensor<string, []>("aw_625_cast_fp16")];
+            tensor<string, []> aw_627_equation_0 = const()[name = tensor<string, []>("aw_627_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_627_cast_fp16 = einsum(equation = aw_627_equation_0, values = (var_4681_cast_fp16_9, var_4663_cast_fp16_9))[name = tensor<string, []>("aw_627_cast_fp16")];
+            tensor<string, []> aw_629_equation_0 = const()[name = tensor<string, []>("aw_629_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_629_cast_fp16 = einsum(equation = aw_629_equation_0, values = (var_4681_cast_fp16_10, var_4663_cast_fp16_10))[name = tensor<string, []>("aw_629_cast_fp16")];
+            tensor<string, []> aw_631_equation_0 = const()[name = tensor<string, []>("aw_631_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_631_cast_fp16 = einsum(equation = aw_631_equation_0, values = (var_4681_cast_fp16_11, var_4663_cast_fp16_11))[name = tensor<string, []>("aw_631_cast_fp16")];
+            tensor<string, []> aw_633_equation_0 = const()[name = tensor<string, []>("aw_633_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_633_cast_fp16 = einsum(equation = aw_633_equation_0, values = (var_4681_cast_fp16_12, var_4663_cast_fp16_12))[name = tensor<string, []>("aw_633_cast_fp16")];
+            tensor<string, []> aw_635_equation_0 = const()[name = tensor<string, []>("aw_635_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_635_cast_fp16 = einsum(equation = aw_635_equation_0, values = (var_4681_cast_fp16_13, var_4663_cast_fp16_13))[name = tensor<string, []>("aw_635_cast_fp16")];
+            tensor<string, []> aw_637_equation_0 = const()[name = tensor<string, []>("aw_637_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_637_cast_fp16 = einsum(equation = aw_637_equation_0, values = (var_4681_cast_fp16_14, var_4663_cast_fp16_14))[name = tensor<string, []>("aw_637_cast_fp16")];
+            tensor<string, []> aw_639_equation_0 = const()[name = tensor<string, []>("aw_639_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_639_cast_fp16 = einsum(equation = aw_639_equation_0, values = (var_4681_cast_fp16_15, var_4663_cast_fp16_15))[name = tensor<string, []>("aw_639_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4747_cast_fp16 = softmax(axis = var_4611, x = aw_609_cast_fp16)[name = tensor<string, []>("op_4747_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4748_cast_fp16 = softmax(axis = var_4611, x = aw_611_cast_fp16)[name = tensor<string, []>("op_4748_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4749_cast_fp16 = softmax(axis = var_4611, x = aw_613_cast_fp16)[name = tensor<string, []>("op_4749_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4750_cast_fp16 = softmax(axis = var_4611, x = aw_615_cast_fp16)[name = tensor<string, []>("op_4750_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4751_cast_fp16 = softmax(axis = var_4611, x = aw_617_cast_fp16)[name = tensor<string, []>("op_4751_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4752_cast_fp16 = softmax(axis = var_4611, x = aw_619_cast_fp16)[name = tensor<string, []>("op_4752_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4753_cast_fp16 = softmax(axis = var_4611, x = aw_621_cast_fp16)[name = tensor<string, []>("op_4753_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4754_cast_fp16 = softmax(axis = var_4611, x = aw_623_cast_fp16)[name = tensor<string, []>("op_4754_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4755_cast_fp16 = softmax(axis = var_4611, x = aw_625_cast_fp16)[name = tensor<string, []>("op_4755_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4756_cast_fp16 = softmax(axis = var_4611, x = aw_627_cast_fp16)[name = tensor<string, []>("op_4756_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4757_cast_fp16 = softmax(axis = var_4611, x = aw_629_cast_fp16)[name = tensor<string, []>("op_4757_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4758_cast_fp16 = softmax(axis = var_4611, x = aw_631_cast_fp16)[name = tensor<string, []>("op_4758_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4759_cast_fp16 = softmax(axis = var_4611, x = aw_633_cast_fp16)[name = tensor<string, []>("op_4759_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4760_cast_fp16 = softmax(axis = var_4611, x = aw_635_cast_fp16)[name = tensor<string, []>("op_4760_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4761_cast_fp16 = softmax(axis = var_4611, x = aw_637_cast_fp16)[name = tensor<string, []>("op_4761_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4762_cast_fp16 = softmax(axis = var_4611, x = aw_639_cast_fp16)[name = tensor<string, []>("op_4762_cast_fp16")];
+            tensor<string, []> var_4764_equation_0 = const()[name = tensor<string, []>("op_4764_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4764_cast_fp16 = einsum(equation = var_4764_equation_0, values = (var_4698_cast_fp16_0, var_4747_cast_fp16))[name = tensor<string, []>("op_4764_cast_fp16")];
+            tensor<string, []> var_4766_equation_0 = const()[name = tensor<string, []>("op_4766_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4766_cast_fp16 = einsum(equation = var_4766_equation_0, values = (var_4698_cast_fp16_1, var_4748_cast_fp16))[name = tensor<string, []>("op_4766_cast_fp16")];
+            tensor<string, []> var_4768_equation_0 = const()[name = tensor<string, []>("op_4768_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4768_cast_fp16 = einsum(equation = var_4768_equation_0, values = (var_4698_cast_fp16_2, var_4749_cast_fp16))[name = tensor<string, []>("op_4768_cast_fp16")];
+            tensor<string, []> var_4770_equation_0 = const()[name = tensor<string, []>("op_4770_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4770_cast_fp16 = einsum(equation = var_4770_equation_0, values = (var_4698_cast_fp16_3, var_4750_cast_fp16))[name = tensor<string, []>("op_4770_cast_fp16")];
+            tensor<string, []> var_4772_equation_0 = const()[name = tensor<string, []>("op_4772_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4772_cast_fp16 = einsum(equation = var_4772_equation_0, values = (var_4698_cast_fp16_4, var_4751_cast_fp16))[name = tensor<string, []>("op_4772_cast_fp16")];
+            tensor<string, []> var_4774_equation_0 = const()[name = tensor<string, []>("op_4774_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4774_cast_fp16 = einsum(equation = var_4774_equation_0, values = (var_4698_cast_fp16_5, var_4752_cast_fp16))[name = tensor<string, []>("op_4774_cast_fp16")];
+            tensor<string, []> var_4776_equation_0 = const()[name = tensor<string, []>("op_4776_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4776_cast_fp16 = einsum(equation = var_4776_equation_0, values = (var_4698_cast_fp16_6, var_4753_cast_fp16))[name = tensor<string, []>("op_4776_cast_fp16")];
+            tensor<string, []> var_4778_equation_0 = const()[name = tensor<string, []>("op_4778_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4778_cast_fp16 = einsum(equation = var_4778_equation_0, values = (var_4698_cast_fp16_7, var_4754_cast_fp16))[name = tensor<string, []>("op_4778_cast_fp16")];
+            tensor<string, []> var_4780_equation_0 = const()[name = tensor<string, []>("op_4780_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4780_cast_fp16 = einsum(equation = var_4780_equation_0, values = (var_4698_cast_fp16_8, var_4755_cast_fp16))[name = tensor<string, []>("op_4780_cast_fp16")];
+            tensor<string, []> var_4782_equation_0 = const()[name = tensor<string, []>("op_4782_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4782_cast_fp16 = einsum(equation = var_4782_equation_0, values = (var_4698_cast_fp16_9, var_4756_cast_fp16))[name = tensor<string, []>("op_4782_cast_fp16")];
+            tensor<string, []> var_4784_equation_0 = const()[name = tensor<string, []>("op_4784_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4784_cast_fp16 = einsum(equation = var_4784_equation_0, values = (var_4698_cast_fp16_10, var_4757_cast_fp16))[name = tensor<string, []>("op_4784_cast_fp16")];
+            tensor<string, []> var_4786_equation_0 = const()[name = tensor<string, []>("op_4786_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4786_cast_fp16 = einsum(equation = var_4786_equation_0, values = (var_4698_cast_fp16_11, var_4758_cast_fp16))[name = tensor<string, []>("op_4786_cast_fp16")];
+            tensor<string, []> var_4788_equation_0 = const()[name = tensor<string, []>("op_4788_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4788_cast_fp16 = einsum(equation = var_4788_equation_0, values = (var_4698_cast_fp16_12, var_4759_cast_fp16))[name = tensor<string, []>("op_4788_cast_fp16")];
+            tensor<string, []> var_4790_equation_0 = const()[name = tensor<string, []>("op_4790_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4790_cast_fp16 = einsum(equation = var_4790_equation_0, values = (var_4698_cast_fp16_13, var_4760_cast_fp16))[name = tensor<string, []>("op_4790_cast_fp16")];
+            tensor<string, []> var_4792_equation_0 = const()[name = tensor<string, []>("op_4792_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4792_cast_fp16 = einsum(equation = var_4792_equation_0, values = (var_4698_cast_fp16_14, var_4761_cast_fp16))[name = tensor<string, []>("op_4792_cast_fp16")];
+            tensor<string, []> var_4794_equation_0 = const()[name = tensor<string, []>("op_4794_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_4794_cast_fp16 = einsum(equation = var_4794_equation_0, values = (var_4698_cast_fp16_15, var_4762_cast_fp16))[name = tensor<string, []>("op_4794_cast_fp16")];
+            tensor<bool, []> input_195_interleave_0 = const()[name = tensor<string, []>("input_195_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_195_cast_fp16 = concat(axis = var_4611, interleave = input_195_interleave_0, values = (var_4764_cast_fp16, var_4766_cast_fp16, var_4768_cast_fp16, var_4770_cast_fp16, var_4772_cast_fp16, var_4774_cast_fp16, var_4776_cast_fp16, var_4778_cast_fp16, var_4780_cast_fp16, var_4782_cast_fp16, var_4784_cast_fp16, var_4786_cast_fp16, var_4788_cast_fp16, var_4790_cast_fp16, var_4792_cast_fp16, var_4794_cast_fp16))[name = tensor<string, []>("input_195_cast_fp16")];
+            tensor<string, []> var_4803_pad_type_0 = const()[name = tensor<string, []>("op_4803_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4803_strides_0 = const()[name = tensor<string, []>("op_4803_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4803_pad_0 = const()[name = tensor<string, []>("op_4803_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4803_dilations_0 = const()[name = tensor<string, []>("op_4803_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4803_groups_0 = const()[name = tensor<string, []>("op_4803_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_19_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(494795392)))];
+            tensor<fp16, [1024]> blocks_19_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496892608)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4803_cast_fp16 = conv(bias = blocks_19_attn_out_bias_to_fp16, dilations = var_4803_dilations_0, groups = var_4803_groups_0, pad = var_4803_pad_0, pad_type = var_4803_pad_type_0, strides = var_4803_strides_0, weight = blocks_19_attn_out_weight_to_fp16, x = input_195_cast_fp16)[name = tensor<string, []>("op_4803_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_79_cast_fp16 = add(x = inputs_77_cast_fp16, y = var_4803_cast_fp16)[name = tensor<string, []>("inputs_79_cast_fp16")];
+            tensor<int32, [1]> input_197_axes_0 = const()[name = tensor<string, []>("input_197_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_197_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_197_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496894720)))];
+            tensor<fp16, [1024]> input_197_beta_0_to_fp16 = const()[name = tensor<string, []>("input_197_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496896832)))];
+            tensor<fp16, []> var_4813_to_fp16 = const()[name = tensor<string, []>("op_4813_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_197_cast_fp16 = layer_norm(axes = input_197_axes_0, beta = input_197_beta_0_to_fp16, epsilon = var_4813_to_fp16, gamma = input_197_gamma_0_to_fp16, x = inputs_79_cast_fp16)[name = tensor<string, []>("input_197_cast_fp16")];
+            tensor<string, []> input_199_pad_type_0 = const()[name = tensor<string, []>("input_199_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_199_strides_0 = const()[name = tensor<string, []>("input_199_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_199_pad_0 = const()[name = tensor<string, []>("input_199_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_199_dilations_0 = const()[name = tensor<string, []>("input_199_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_199_groups_0 = const()[name = tensor<string, []>("input_199_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_19_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(496898944)))];
+            tensor<fp16, [4096]> blocks_19_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(505287616)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_199_cast_fp16 = conv(bias = blocks_19_mlp_0_bias_to_fp16, dilations = input_199_dilations_0, groups = input_199_groups_0, pad = input_199_pad_0, pad_type = input_199_pad_type_0, strides = input_199_strides_0, weight = blocks_19_mlp_0_weight_to_fp16, x = input_197_cast_fp16)[name = tensor<string, []>("input_199_cast_fp16")];
+            tensor<string, []> input_201_mode_0 = const()[name = tensor<string, []>("input_201_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_201_cast_fp16 = gelu(mode = input_201_mode_0, x = input_199_cast_fp16)[name = tensor<string, []>("input_201_cast_fp16")];
+            tensor<string, []> var_4839_pad_type_0 = const()[name = tensor<string, []>("op_4839_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4839_strides_0 = const()[name = tensor<string, []>("op_4839_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4839_pad_0 = const()[name = tensor<string, []>("op_4839_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4839_dilations_0 = const()[name = tensor<string, []>("op_4839_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4839_groups_0 = const()[name = tensor<string, []>("op_4839_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_19_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(505295872)))];
+            tensor<fp16, [1024]> blocks_19_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_19_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(513684544)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4839_cast_fp16 = conv(bias = blocks_19_mlp_2_bias_to_fp16, dilations = var_4839_dilations_0, groups = var_4839_groups_0, pad = var_4839_pad_0, pad_type = var_4839_pad_type_0, strides = var_4839_strides_0, weight = blocks_19_mlp_2_weight_to_fp16, x = input_201_cast_fp16)[name = tensor<string, []>("op_4839_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_81_cast_fp16 = add(x = inputs_79_cast_fp16, y = var_4839_cast_fp16)[name = tensor<string, []>("inputs_81_cast_fp16")];
+            tensor<int32, []> var_4848 = const()[name = tensor<string, []>("op_4848"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_203_axes_0 = const()[name = tensor<string, []>("input_203_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_203_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_203_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(513686656)))];
+            tensor<fp16, [1024]> input_203_beta_0_to_fp16 = const()[name = tensor<string, []>("input_203_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(513688768)))];
+            tensor<fp16, []> var_4864_to_fp16 = const()[name = tensor<string, []>("op_4864_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_203_cast_fp16 = layer_norm(axes = input_203_axes_0, beta = input_203_beta_0_to_fp16, epsilon = var_4864_to_fp16, gamma = input_203_gamma_0_to_fp16, x = inputs_81_cast_fp16)[name = tensor<string, []>("input_203_cast_fp16")];
+            tensor<string, []> q_41_pad_type_0 = const()[name = tensor<string, []>("q_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_41_strides_0 = const()[name = tensor<string, []>("q_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_41_pad_0 = const()[name = tensor<string, []>("q_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_41_dilations_0 = const()[name = tensor<string, []>("q_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_41_groups_0 = const()[name = tensor<string, []>("q_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_4899_weight_0_to_fp16 = const()[name = tensor<string, []>("op_4899_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(513690880)))];
+            tensor<fp16, [1024]> var_4899_bias_0_to_fp16 = const()[name = tensor<string, []>("op_4899_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(515788096)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4899_cast_fp16 = conv(bias = var_4899_bias_0_to_fp16, dilations = q_41_dilations_0, groups = q_41_groups_0, pad = q_41_pad_0, pad_type = q_41_pad_type_0, strides = q_41_strides_0, weight = var_4899_weight_0_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("op_4899_cast_fp16")];
+            tensor<string, []> k_41_pad_type_0 = const()[name = tensor<string, []>("k_41_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_41_strides_0 = const()[name = tensor<string, []>("k_41_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_41_pad_0 = const()[name = tensor<string, []>("k_41_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_41_dilations_0 = const()[name = tensor<string, []>("k_41_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_41_groups_0 = const()[name = tensor<string, []>("k_41_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_20_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(515790208)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_41_cast_fp16 = conv(dilations = k_41_dilations_0, groups = k_41_groups_0, pad = k_41_pad_0, pad_type = k_41_pad_type_0, strides = k_41_strides_0, weight = blocks_20_attn_key_weight_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("k_41_cast_fp16")];
+            tensor<string, []> var_4897_pad_type_0 = const()[name = tensor<string, []>("op_4897_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_4897_strides_0 = const()[name = tensor<string, []>("op_4897_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4897_pad_0 = const()[name = tensor<string, []>("op_4897_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4897_dilations_0 = const()[name = tensor<string, []>("op_4897_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_4897_groups_0 = const()[name = tensor<string, []>("op_4897_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_20_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(517887424)))];
+            tensor<fp16, [1024]> blocks_20_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(519984640)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_4897_cast_fp16 = conv(bias = blocks_20_attn_value_bias_to_fp16, dilations = var_4897_dilations_0, groups = var_4897_groups_0, pad = var_4897_pad_0, pad_type = var_4897_pad_type_0, strides = var_4897_strides_0, weight = blocks_20_attn_value_weight_to_fp16, x = input_203_cast_fp16)[name = tensor<string, []>("op_4897_cast_fp16")];
+            tensor<int32, [16]> tile_60 = const()[name = tensor<string, []>("tile_60"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4900_axis_0 = const()[name = tensor<string, []>("op_4900_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4900_cast_fp16_15 = split(axis = var_4900_axis_0, split_sizes = tile_60, x = var_4899_cast_fp16)[name = tensor<string, []>("op_4900_cast_fp16")];
+            tensor<int32, [4]> var_4917_perm_0 = const()[name = tensor<string, []>("op_4917_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_61 = const()[name = tensor<string, []>("tile_61"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4918_axis_0 = const()[name = tensor<string, []>("op_4918_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_4917_cast_fp16 = transpose(perm = var_4917_perm_0, x = k_41_cast_fp16)[name = tensor<string, []>("transpose_4")];
+            tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_4918_cast_fp16_15 = split(axis = var_4918_axis_0, split_sizes = tile_61, x = var_4917_cast_fp16)[name = tensor<string, []>("op_4918_cast_fp16")];
+            tensor<int32, [16]> tile_62 = const()[name = tensor<string, []>("tile_62"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_4935_axis_0 = const()[name = tensor<string, []>("op_4935_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_4935_cast_fp16_15 = split(axis = var_4935_axis_0, split_sizes = tile_62, x = var_4897_cast_fp16)[name = tensor<string, []>("op_4935_cast_fp16")];
+            tensor<string, []> aw_641_equation_0 = const()[name = tensor<string, []>("aw_641_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_641_cast_fp16 = einsum(equation = aw_641_equation_0, values = (var_4918_cast_fp16_0, var_4900_cast_fp16_0))[name = tensor<string, []>("aw_641_cast_fp16")];
+            tensor<string, []> aw_643_equation_0 = const()[name = tensor<string, []>("aw_643_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_643_cast_fp16 = einsum(equation = aw_643_equation_0, values = (var_4918_cast_fp16_1, var_4900_cast_fp16_1))[name = tensor<string, []>("aw_643_cast_fp16")];
+            tensor<string, []> aw_645_equation_0 = const()[name = tensor<string, []>("aw_645_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_645_cast_fp16 = einsum(equation = aw_645_equation_0, values = (var_4918_cast_fp16_2, var_4900_cast_fp16_2))[name = tensor<string, []>("aw_645_cast_fp16")];
+            tensor<string, []> aw_647_equation_0 = const()[name = tensor<string, []>("aw_647_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_647_cast_fp16 = einsum(equation = aw_647_equation_0, values = (var_4918_cast_fp16_3, var_4900_cast_fp16_3))[name = tensor<string, []>("aw_647_cast_fp16")];
+            tensor<string, []> aw_649_equation_0 = const()[name = tensor<string, []>("aw_649_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_649_cast_fp16 = einsum(equation = aw_649_equation_0, values = (var_4918_cast_fp16_4, var_4900_cast_fp16_4))[name = tensor<string, []>("aw_649_cast_fp16")];
+            tensor<string, []> aw_651_equation_0 = const()[name = tensor<string, []>("aw_651_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_651_cast_fp16 = einsum(equation = aw_651_equation_0, values = (var_4918_cast_fp16_5, var_4900_cast_fp16_5))[name = tensor<string, []>("aw_651_cast_fp16")];
+            tensor<string, []> aw_653_equation_0 = const()[name = tensor<string, []>("aw_653_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_653_cast_fp16 = einsum(equation = aw_653_equation_0, values = (var_4918_cast_fp16_6, var_4900_cast_fp16_6))[name = tensor<string, []>("aw_653_cast_fp16")];
+            tensor<string, []> aw_655_equation_0 = const()[name = tensor<string, []>("aw_655_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_655_cast_fp16 = einsum(equation = aw_655_equation_0, values = (var_4918_cast_fp16_7, var_4900_cast_fp16_7))[name = tensor<string, []>("aw_655_cast_fp16")];
+            tensor<string, []> aw_657_equation_0 = const()[name = tensor<string, []>("aw_657_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_657_cast_fp16 = einsum(equation = aw_657_equation_0, values = (var_4918_cast_fp16_8, var_4900_cast_fp16_8))[name = tensor<string, []>("aw_657_cast_fp16")];
+            tensor<string, []> aw_659_equation_0 = const()[name = tensor<string, []>("aw_659_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_659_cast_fp16 = einsum(equation = aw_659_equation_0, values = (var_4918_cast_fp16_9, var_4900_cast_fp16_9))[name = tensor<string, []>("aw_659_cast_fp16")];
+            tensor<string, []> aw_661_equation_0 = const()[name = tensor<string, []>("aw_661_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_661_cast_fp16 = einsum(equation = aw_661_equation_0, values = (var_4918_cast_fp16_10, var_4900_cast_fp16_10))[name = tensor<string, []>("aw_661_cast_fp16")];
+            tensor<string, []> aw_663_equation_0 = const()[name = tensor<string, []>("aw_663_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_663_cast_fp16 = einsum(equation = aw_663_equation_0, values = (var_4918_cast_fp16_11, var_4900_cast_fp16_11))[name = tensor<string, []>("aw_663_cast_fp16")];
+            tensor<string, []> aw_665_equation_0 = const()[name = tensor<string, []>("aw_665_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_665_cast_fp16 = einsum(equation = aw_665_equation_0, values = (var_4918_cast_fp16_12, var_4900_cast_fp16_12))[name = tensor<string, []>("aw_665_cast_fp16")];
+            tensor<string, []> aw_667_equation_0 = const()[name = tensor<string, []>("aw_667_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_667_cast_fp16 = einsum(equation = aw_667_equation_0, values = (var_4918_cast_fp16_13, var_4900_cast_fp16_13))[name = tensor<string, []>("aw_667_cast_fp16")];
+            tensor<string, []> aw_669_equation_0 = const()[name = tensor<string, []>("aw_669_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_669_cast_fp16 = einsum(equation = aw_669_equation_0, values = (var_4918_cast_fp16_14, var_4900_cast_fp16_14))[name = tensor<string, []>("aw_669_cast_fp16")];
+            tensor<string, []> aw_671_equation_0 = const()[name = tensor<string, []>("aw_671_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_671_cast_fp16 = einsum(equation = aw_671_equation_0, values = (var_4918_cast_fp16_15, var_4900_cast_fp16_15))[name = tensor<string, []>("aw_671_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4984_cast_fp16 = softmax(axis = var_4848, x = aw_641_cast_fp16)[name = tensor<string, []>("op_4984_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4985_cast_fp16 = softmax(axis = var_4848, x = aw_643_cast_fp16)[name = tensor<string, []>("op_4985_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4986_cast_fp16 = softmax(axis = var_4848, x = aw_645_cast_fp16)[name = tensor<string, []>("op_4986_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4987_cast_fp16 = softmax(axis = var_4848, x = aw_647_cast_fp16)[name = tensor<string, []>("op_4987_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4988_cast_fp16 = softmax(axis = var_4848, x = aw_649_cast_fp16)[name = tensor<string, []>("op_4988_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4989_cast_fp16 = softmax(axis = var_4848, x = aw_651_cast_fp16)[name = tensor<string, []>("op_4989_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4990_cast_fp16 = softmax(axis = var_4848, x = aw_653_cast_fp16)[name = tensor<string, []>("op_4990_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4991_cast_fp16 = softmax(axis = var_4848, x = aw_655_cast_fp16)[name = tensor<string, []>("op_4991_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4992_cast_fp16 = softmax(axis = var_4848, x = aw_657_cast_fp16)[name = tensor<string, []>("op_4992_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4993_cast_fp16 = softmax(axis = var_4848, x = aw_659_cast_fp16)[name = tensor<string, []>("op_4993_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4994_cast_fp16 = softmax(axis = var_4848, x = aw_661_cast_fp16)[name = tensor<string, []>("op_4994_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4995_cast_fp16 = softmax(axis = var_4848, x = aw_663_cast_fp16)[name = tensor<string, []>("op_4995_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4996_cast_fp16 = softmax(axis = var_4848, x = aw_665_cast_fp16)[name = tensor<string, []>("op_4996_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4997_cast_fp16 = softmax(axis = var_4848, x = aw_667_cast_fp16)[name = tensor<string, []>("op_4997_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4998_cast_fp16 = softmax(axis = var_4848, x = aw_669_cast_fp16)[name = tensor<string, []>("op_4998_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_4999_cast_fp16 = softmax(axis = var_4848, x = aw_671_cast_fp16)[name = tensor<string, []>("op_4999_cast_fp16")];
+            tensor<string, []> var_5001_equation_0 = const()[name = tensor<string, []>("op_5001_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5001_cast_fp16 = einsum(equation = var_5001_equation_0, values = (var_4935_cast_fp16_0, var_4984_cast_fp16))[name = tensor<string, []>("op_5001_cast_fp16")];
+            tensor<string, []> var_5003_equation_0 = const()[name = tensor<string, []>("op_5003_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5003_cast_fp16 = einsum(equation = var_5003_equation_0, values = (var_4935_cast_fp16_1, var_4985_cast_fp16))[name = tensor<string, []>("op_5003_cast_fp16")];
+            tensor<string, []> var_5005_equation_0 = const()[name = tensor<string, []>("op_5005_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5005_cast_fp16 = einsum(equation = var_5005_equation_0, values = (var_4935_cast_fp16_2, var_4986_cast_fp16))[name = tensor<string, []>("op_5005_cast_fp16")];
+            tensor<string, []> var_5007_equation_0 = const()[name = tensor<string, []>("op_5007_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5007_cast_fp16 = einsum(equation = var_5007_equation_0, values = (var_4935_cast_fp16_3, var_4987_cast_fp16))[name = tensor<string, []>("op_5007_cast_fp16")];
+            tensor<string, []> var_5009_equation_0 = const()[name = tensor<string, []>("op_5009_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5009_cast_fp16 = einsum(equation = var_5009_equation_0, values = (var_4935_cast_fp16_4, var_4988_cast_fp16))[name = tensor<string, []>("op_5009_cast_fp16")];
+            tensor<string, []> var_5011_equation_0 = const()[name = tensor<string, []>("op_5011_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5011_cast_fp16 = einsum(equation = var_5011_equation_0, values = (var_4935_cast_fp16_5, var_4989_cast_fp16))[name = tensor<string, []>("op_5011_cast_fp16")];
+            tensor<string, []> var_5013_equation_0 = const()[name = tensor<string, []>("op_5013_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5013_cast_fp16 = einsum(equation = var_5013_equation_0, values = (var_4935_cast_fp16_6, var_4990_cast_fp16))[name = tensor<string, []>("op_5013_cast_fp16")];
+            tensor<string, []> var_5015_equation_0 = const()[name = tensor<string, []>("op_5015_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5015_cast_fp16 = einsum(equation = var_5015_equation_0, values = (var_4935_cast_fp16_7, var_4991_cast_fp16))[name = tensor<string, []>("op_5015_cast_fp16")];
+            tensor<string, []> var_5017_equation_0 = const()[name = tensor<string, []>("op_5017_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5017_cast_fp16 = einsum(equation = var_5017_equation_0, values = (var_4935_cast_fp16_8, var_4992_cast_fp16))[name = tensor<string, []>("op_5017_cast_fp16")];
+            tensor<string, []> var_5019_equation_0 = const()[name = tensor<string, []>("op_5019_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5019_cast_fp16 = einsum(equation = var_5019_equation_0, values = (var_4935_cast_fp16_9, var_4993_cast_fp16))[name = tensor<string, []>("op_5019_cast_fp16")];
+            tensor<string, []> var_5021_equation_0 = const()[name = tensor<string, []>("op_5021_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5021_cast_fp16 = einsum(equation = var_5021_equation_0, values = (var_4935_cast_fp16_10, var_4994_cast_fp16))[name = tensor<string, []>("op_5021_cast_fp16")];
+            tensor<string, []> var_5023_equation_0 = const()[name = tensor<string, []>("op_5023_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5023_cast_fp16 = einsum(equation = var_5023_equation_0, values = (var_4935_cast_fp16_11, var_4995_cast_fp16))[name = tensor<string, []>("op_5023_cast_fp16")];
+            tensor<string, []> var_5025_equation_0 = const()[name = tensor<string, []>("op_5025_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5025_cast_fp16 = einsum(equation = var_5025_equation_0, values = (var_4935_cast_fp16_12, var_4996_cast_fp16))[name = tensor<string, []>("op_5025_cast_fp16")];
+            tensor<string, []> var_5027_equation_0 = const()[name = tensor<string, []>("op_5027_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5027_cast_fp16 = einsum(equation = var_5027_equation_0, values = (var_4935_cast_fp16_13, var_4997_cast_fp16))[name = tensor<string, []>("op_5027_cast_fp16")];
+            tensor<string, []> var_5029_equation_0 = const()[name = tensor<string, []>("op_5029_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5029_cast_fp16 = einsum(equation = var_5029_equation_0, values = (var_4935_cast_fp16_14, var_4998_cast_fp16))[name = tensor<string, []>("op_5029_cast_fp16")];
+            tensor<string, []> var_5031_equation_0 = const()[name = tensor<string, []>("op_5031_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5031_cast_fp16 = einsum(equation = var_5031_equation_0, values = (var_4935_cast_fp16_15, var_4999_cast_fp16))[name = tensor<string, []>("op_5031_cast_fp16")];
+            tensor<bool, []> input_205_interleave_0 = const()[name = tensor<string, []>("input_205_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_205_cast_fp16 = concat(axis = var_4848, interleave = input_205_interleave_0, values = (var_5001_cast_fp16, var_5003_cast_fp16, var_5005_cast_fp16, var_5007_cast_fp16, var_5009_cast_fp16, var_5011_cast_fp16, var_5013_cast_fp16, var_5015_cast_fp16, var_5017_cast_fp16, var_5019_cast_fp16, var_5021_cast_fp16, var_5023_cast_fp16, var_5025_cast_fp16, var_5027_cast_fp16, var_5029_cast_fp16, var_5031_cast_fp16))[name = tensor<string, []>("input_205_cast_fp16")];
+            tensor<string, []> var_5040_pad_type_0 = const()[name = tensor<string, []>("op_5040_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5040_strides_0 = const()[name = tensor<string, []>("op_5040_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5040_pad_0 = const()[name = tensor<string, []>("op_5040_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5040_dilations_0 = const()[name = tensor<string, []>("op_5040_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5040_groups_0 = const()[name = tensor<string, []>("op_5040_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_20_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(519986752)))];
+            tensor<fp16, [1024]> blocks_20_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(522083968)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5040_cast_fp16 = conv(bias = blocks_20_attn_out_bias_to_fp16, dilations = var_5040_dilations_0, groups = var_5040_groups_0, pad = var_5040_pad_0, pad_type = var_5040_pad_type_0, strides = var_5040_strides_0, weight = blocks_20_attn_out_weight_to_fp16, x = input_205_cast_fp16)[name = tensor<string, []>("op_5040_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_83_cast_fp16 = add(x = inputs_81_cast_fp16, y = var_5040_cast_fp16)[name = tensor<string, []>("inputs_83_cast_fp16")];
+            tensor<int32, [1]> input_207_axes_0 = const()[name = tensor<string, []>("input_207_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_207_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_207_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(522086080)))];
+            tensor<fp16, [1024]> input_207_beta_0_to_fp16 = const()[name = tensor<string, []>("input_207_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(522088192)))];
+            tensor<fp16, []> var_5050_to_fp16 = const()[name = tensor<string, []>("op_5050_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_207_cast_fp16 = layer_norm(axes = input_207_axes_0, beta = input_207_beta_0_to_fp16, epsilon = var_5050_to_fp16, gamma = input_207_gamma_0_to_fp16, x = inputs_83_cast_fp16)[name = tensor<string, []>("input_207_cast_fp16")];
+            tensor<string, []> input_209_pad_type_0 = const()[name = tensor<string, []>("input_209_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_209_strides_0 = const()[name = tensor<string, []>("input_209_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_209_pad_0 = const()[name = tensor<string, []>("input_209_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_209_dilations_0 = const()[name = tensor<string, []>("input_209_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_209_groups_0 = const()[name = tensor<string, []>("input_209_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_20_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(522090304)))];
+            tensor<fp16, [4096]> blocks_20_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(530478976)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_209_cast_fp16 = conv(bias = blocks_20_mlp_0_bias_to_fp16, dilations = input_209_dilations_0, groups = input_209_groups_0, pad = input_209_pad_0, pad_type = input_209_pad_type_0, strides = input_209_strides_0, weight = blocks_20_mlp_0_weight_to_fp16, x = input_207_cast_fp16)[name = tensor<string, []>("input_209_cast_fp16")];
+            tensor<string, []> input_211_mode_0 = const()[name = tensor<string, []>("input_211_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_211_cast_fp16 = gelu(mode = input_211_mode_0, x = input_209_cast_fp16)[name = tensor<string, []>("input_211_cast_fp16")];
+            tensor<string, []> var_5076_pad_type_0 = const()[name = tensor<string, []>("op_5076_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5076_strides_0 = const()[name = tensor<string, []>("op_5076_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5076_pad_0 = const()[name = tensor<string, []>("op_5076_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5076_dilations_0 = const()[name = tensor<string, []>("op_5076_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5076_groups_0 = const()[name = tensor<string, []>("op_5076_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_20_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(530487232)))];
+            tensor<fp16, [1024]> blocks_20_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_20_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(538875904)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5076_cast_fp16 = conv(bias = blocks_20_mlp_2_bias_to_fp16, dilations = var_5076_dilations_0, groups = var_5076_groups_0, pad = var_5076_pad_0, pad_type = var_5076_pad_type_0, strides = var_5076_strides_0, weight = blocks_20_mlp_2_weight_to_fp16, x = input_211_cast_fp16)[name = tensor<string, []>("op_5076_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_85_cast_fp16 = add(x = inputs_83_cast_fp16, y = var_5076_cast_fp16)[name = tensor<string, []>("inputs_85_cast_fp16")];
+            tensor<int32, []> var_5085 = const()[name = tensor<string, []>("op_5085"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_213_axes_0 = const()[name = tensor<string, []>("input_213_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_213_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_213_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(538878016)))];
+            tensor<fp16, [1024]> input_213_beta_0_to_fp16 = const()[name = tensor<string, []>("input_213_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(538880128)))];
+            tensor<fp16, []> var_5101_to_fp16 = const()[name = tensor<string, []>("op_5101_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_213_cast_fp16 = layer_norm(axes = input_213_axes_0, beta = input_213_beta_0_to_fp16, epsilon = var_5101_to_fp16, gamma = input_213_gamma_0_to_fp16, x = inputs_85_cast_fp16)[name = tensor<string, []>("input_213_cast_fp16")];
+            tensor<string, []> q_43_pad_type_0 = const()[name = tensor<string, []>("q_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_43_strides_0 = const()[name = tensor<string, []>("q_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_43_pad_0 = const()[name = tensor<string, []>("q_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_43_dilations_0 = const()[name = tensor<string, []>("q_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_43_groups_0 = const()[name = tensor<string, []>("q_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_5136_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5136_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(538882240)))];
+            tensor<fp16, [1024]> var_5136_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5136_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(540979456)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5136_cast_fp16 = conv(bias = var_5136_bias_0_to_fp16, dilations = q_43_dilations_0, groups = q_43_groups_0, pad = q_43_pad_0, pad_type = q_43_pad_type_0, strides = q_43_strides_0, weight = var_5136_weight_0_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("op_5136_cast_fp16")];
+            tensor<string, []> k_43_pad_type_0 = const()[name = tensor<string, []>("k_43_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_43_strides_0 = const()[name = tensor<string, []>("k_43_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_43_pad_0 = const()[name = tensor<string, []>("k_43_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_43_dilations_0 = const()[name = tensor<string, []>("k_43_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_43_groups_0 = const()[name = tensor<string, []>("k_43_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_21_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(540981568)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_43_cast_fp16 = conv(dilations = k_43_dilations_0, groups = k_43_groups_0, pad = k_43_pad_0, pad_type = k_43_pad_type_0, strides = k_43_strides_0, weight = blocks_21_attn_key_weight_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("k_43_cast_fp16")];
+            tensor<string, []> var_5134_pad_type_0 = const()[name = tensor<string, []>("op_5134_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5134_strides_0 = const()[name = tensor<string, []>("op_5134_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5134_pad_0 = const()[name = tensor<string, []>("op_5134_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5134_dilations_0 = const()[name = tensor<string, []>("op_5134_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5134_groups_0 = const()[name = tensor<string, []>("op_5134_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_21_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(543078784)))];
+            tensor<fp16, [1024]> blocks_21_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(545176000)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5134_cast_fp16 = conv(bias = blocks_21_attn_value_bias_to_fp16, dilations = var_5134_dilations_0, groups = var_5134_groups_0, pad = var_5134_pad_0, pad_type = var_5134_pad_type_0, strides = var_5134_strides_0, weight = blocks_21_attn_value_weight_to_fp16, x = input_213_cast_fp16)[name = tensor<string, []>("op_5134_cast_fp16")];
+            tensor<int32, [16]> tile_63 = const()[name = tensor<string, []>("tile_63"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5137_axis_0 = const()[name = tensor<string, []>("op_5137_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5137_cast_fp16_15 = split(axis = var_5137_axis_0, split_sizes = tile_63, x = var_5136_cast_fp16)[name = tensor<string, []>("op_5137_cast_fp16")];
+            tensor<int32, [4]> var_5154_perm_0 = const()[name = tensor<string, []>("op_5154_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_64 = const()[name = tensor<string, []>("tile_64"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5155_axis_0 = const()[name = tensor<string, []>("op_5155_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_5154_cast_fp16 = transpose(perm = var_5154_perm_0, x = k_43_cast_fp16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5155_cast_fp16_15 = split(axis = var_5155_axis_0, split_sizes = tile_64, x = var_5154_cast_fp16)[name = tensor<string, []>("op_5155_cast_fp16")];
+            tensor<int32, [16]> tile_65 = const()[name = tensor<string, []>("tile_65"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5172_axis_0 = const()[name = tensor<string, []>("op_5172_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5172_cast_fp16_15 = split(axis = var_5172_axis_0, split_sizes = tile_65, x = var_5134_cast_fp16)[name = tensor<string, []>("op_5172_cast_fp16")];
+            tensor<string, []> aw_673_equation_0 = const()[name = tensor<string, []>("aw_673_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_673_cast_fp16 = einsum(equation = aw_673_equation_0, values = (var_5155_cast_fp16_0, var_5137_cast_fp16_0))[name = tensor<string, []>("aw_673_cast_fp16")];
+            tensor<string, []> aw_675_equation_0 = const()[name = tensor<string, []>("aw_675_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_675_cast_fp16 = einsum(equation = aw_675_equation_0, values = (var_5155_cast_fp16_1, var_5137_cast_fp16_1))[name = tensor<string, []>("aw_675_cast_fp16")];
+            tensor<string, []> aw_677_equation_0 = const()[name = tensor<string, []>("aw_677_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_677_cast_fp16 = einsum(equation = aw_677_equation_0, values = (var_5155_cast_fp16_2, var_5137_cast_fp16_2))[name = tensor<string, []>("aw_677_cast_fp16")];
+            tensor<string, []> aw_679_equation_0 = const()[name = tensor<string, []>("aw_679_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_679_cast_fp16 = einsum(equation = aw_679_equation_0, values = (var_5155_cast_fp16_3, var_5137_cast_fp16_3))[name = tensor<string, []>("aw_679_cast_fp16")];
+            tensor<string, []> aw_681_equation_0 = const()[name = tensor<string, []>("aw_681_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_681_cast_fp16 = einsum(equation = aw_681_equation_0, values = (var_5155_cast_fp16_4, var_5137_cast_fp16_4))[name = tensor<string, []>("aw_681_cast_fp16")];
+            tensor<string, []> aw_683_equation_0 = const()[name = tensor<string, []>("aw_683_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_683_cast_fp16 = einsum(equation = aw_683_equation_0, values = (var_5155_cast_fp16_5, var_5137_cast_fp16_5))[name = tensor<string, []>("aw_683_cast_fp16")];
+            tensor<string, []> aw_685_equation_0 = const()[name = tensor<string, []>("aw_685_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_685_cast_fp16 = einsum(equation = aw_685_equation_0, values = (var_5155_cast_fp16_6, var_5137_cast_fp16_6))[name = tensor<string, []>("aw_685_cast_fp16")];
+            tensor<string, []> aw_687_equation_0 = const()[name = tensor<string, []>("aw_687_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_687_cast_fp16 = einsum(equation = aw_687_equation_0, values = (var_5155_cast_fp16_7, var_5137_cast_fp16_7))[name = tensor<string, []>("aw_687_cast_fp16")];
+            tensor<string, []> aw_689_equation_0 = const()[name = tensor<string, []>("aw_689_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_689_cast_fp16 = einsum(equation = aw_689_equation_0, values = (var_5155_cast_fp16_8, var_5137_cast_fp16_8))[name = tensor<string, []>("aw_689_cast_fp16")];
+            tensor<string, []> aw_691_equation_0 = const()[name = tensor<string, []>("aw_691_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_691_cast_fp16 = einsum(equation = aw_691_equation_0, values = (var_5155_cast_fp16_9, var_5137_cast_fp16_9))[name = tensor<string, []>("aw_691_cast_fp16")];
+            tensor<string, []> aw_693_equation_0 = const()[name = tensor<string, []>("aw_693_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_693_cast_fp16 = einsum(equation = aw_693_equation_0, values = (var_5155_cast_fp16_10, var_5137_cast_fp16_10))[name = tensor<string, []>("aw_693_cast_fp16")];
+            tensor<string, []> aw_695_equation_0 = const()[name = tensor<string, []>("aw_695_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_695_cast_fp16 = einsum(equation = aw_695_equation_0, values = (var_5155_cast_fp16_11, var_5137_cast_fp16_11))[name = tensor<string, []>("aw_695_cast_fp16")];
+            tensor<string, []> aw_697_equation_0 = const()[name = tensor<string, []>("aw_697_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_697_cast_fp16 = einsum(equation = aw_697_equation_0, values = (var_5155_cast_fp16_12, var_5137_cast_fp16_12))[name = tensor<string, []>("aw_697_cast_fp16")];
+            tensor<string, []> aw_699_equation_0 = const()[name = tensor<string, []>("aw_699_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_699_cast_fp16 = einsum(equation = aw_699_equation_0, values = (var_5155_cast_fp16_13, var_5137_cast_fp16_13))[name = tensor<string, []>("aw_699_cast_fp16")];
+            tensor<string, []> aw_701_equation_0 = const()[name = tensor<string, []>("aw_701_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_701_cast_fp16 = einsum(equation = aw_701_equation_0, values = (var_5155_cast_fp16_14, var_5137_cast_fp16_14))[name = tensor<string, []>("aw_701_cast_fp16")];
+            tensor<string, []> aw_703_equation_0 = const()[name = tensor<string, []>("aw_703_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_703_cast_fp16 = einsum(equation = aw_703_equation_0, values = (var_5155_cast_fp16_15, var_5137_cast_fp16_15))[name = tensor<string, []>("aw_703_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5221_cast_fp16 = softmax(axis = var_5085, x = aw_673_cast_fp16)[name = tensor<string, []>("op_5221_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5222_cast_fp16 = softmax(axis = var_5085, x = aw_675_cast_fp16)[name = tensor<string, []>("op_5222_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5223_cast_fp16 = softmax(axis = var_5085, x = aw_677_cast_fp16)[name = tensor<string, []>("op_5223_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5224_cast_fp16 = softmax(axis = var_5085, x = aw_679_cast_fp16)[name = tensor<string, []>("op_5224_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5225_cast_fp16 = softmax(axis = var_5085, x = aw_681_cast_fp16)[name = tensor<string, []>("op_5225_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5226_cast_fp16 = softmax(axis = var_5085, x = aw_683_cast_fp16)[name = tensor<string, []>("op_5226_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5227_cast_fp16 = softmax(axis = var_5085, x = aw_685_cast_fp16)[name = tensor<string, []>("op_5227_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5228_cast_fp16 = softmax(axis = var_5085, x = aw_687_cast_fp16)[name = tensor<string, []>("op_5228_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5229_cast_fp16 = softmax(axis = var_5085, x = aw_689_cast_fp16)[name = tensor<string, []>("op_5229_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5230_cast_fp16 = softmax(axis = var_5085, x = aw_691_cast_fp16)[name = tensor<string, []>("op_5230_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5231_cast_fp16 = softmax(axis = var_5085, x = aw_693_cast_fp16)[name = tensor<string, []>("op_5231_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5232_cast_fp16 = softmax(axis = var_5085, x = aw_695_cast_fp16)[name = tensor<string, []>("op_5232_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5233_cast_fp16 = softmax(axis = var_5085, x = aw_697_cast_fp16)[name = tensor<string, []>("op_5233_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5234_cast_fp16 = softmax(axis = var_5085, x = aw_699_cast_fp16)[name = tensor<string, []>("op_5234_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5235_cast_fp16 = softmax(axis = var_5085, x = aw_701_cast_fp16)[name = tensor<string, []>("op_5235_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5236_cast_fp16 = softmax(axis = var_5085, x = aw_703_cast_fp16)[name = tensor<string, []>("op_5236_cast_fp16")];
+            tensor<string, []> var_5238_equation_0 = const()[name = tensor<string, []>("op_5238_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5238_cast_fp16 = einsum(equation = var_5238_equation_0, values = (var_5172_cast_fp16_0, var_5221_cast_fp16))[name = tensor<string, []>("op_5238_cast_fp16")];
+            tensor<string, []> var_5240_equation_0 = const()[name = tensor<string, []>("op_5240_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5240_cast_fp16 = einsum(equation = var_5240_equation_0, values = (var_5172_cast_fp16_1, var_5222_cast_fp16))[name = tensor<string, []>("op_5240_cast_fp16")];
+            tensor<string, []> var_5242_equation_0 = const()[name = tensor<string, []>("op_5242_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5242_cast_fp16 = einsum(equation = var_5242_equation_0, values = (var_5172_cast_fp16_2, var_5223_cast_fp16))[name = tensor<string, []>("op_5242_cast_fp16")];
+            tensor<string, []> var_5244_equation_0 = const()[name = tensor<string, []>("op_5244_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5244_cast_fp16 = einsum(equation = var_5244_equation_0, values = (var_5172_cast_fp16_3, var_5224_cast_fp16))[name = tensor<string, []>("op_5244_cast_fp16")];
+            tensor<string, []> var_5246_equation_0 = const()[name = tensor<string, []>("op_5246_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5246_cast_fp16 = einsum(equation = var_5246_equation_0, values = (var_5172_cast_fp16_4, var_5225_cast_fp16))[name = tensor<string, []>("op_5246_cast_fp16")];
+            tensor<string, []> var_5248_equation_0 = const()[name = tensor<string, []>("op_5248_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5248_cast_fp16 = einsum(equation = var_5248_equation_0, values = (var_5172_cast_fp16_5, var_5226_cast_fp16))[name = tensor<string, []>("op_5248_cast_fp16")];
+            tensor<string, []> var_5250_equation_0 = const()[name = tensor<string, []>("op_5250_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5250_cast_fp16 = einsum(equation = var_5250_equation_0, values = (var_5172_cast_fp16_6, var_5227_cast_fp16))[name = tensor<string, []>("op_5250_cast_fp16")];
+            tensor<string, []> var_5252_equation_0 = const()[name = tensor<string, []>("op_5252_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5252_cast_fp16 = einsum(equation = var_5252_equation_0, values = (var_5172_cast_fp16_7, var_5228_cast_fp16))[name = tensor<string, []>("op_5252_cast_fp16")];
+            tensor<string, []> var_5254_equation_0 = const()[name = tensor<string, []>("op_5254_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5254_cast_fp16 = einsum(equation = var_5254_equation_0, values = (var_5172_cast_fp16_8, var_5229_cast_fp16))[name = tensor<string, []>("op_5254_cast_fp16")];
+            tensor<string, []> var_5256_equation_0 = const()[name = tensor<string, []>("op_5256_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5256_cast_fp16 = einsum(equation = var_5256_equation_0, values = (var_5172_cast_fp16_9, var_5230_cast_fp16))[name = tensor<string, []>("op_5256_cast_fp16")];
+            tensor<string, []> var_5258_equation_0 = const()[name = tensor<string, []>("op_5258_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5258_cast_fp16 = einsum(equation = var_5258_equation_0, values = (var_5172_cast_fp16_10, var_5231_cast_fp16))[name = tensor<string, []>("op_5258_cast_fp16")];
+            tensor<string, []> var_5260_equation_0 = const()[name = tensor<string, []>("op_5260_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5260_cast_fp16 = einsum(equation = var_5260_equation_0, values = (var_5172_cast_fp16_11, var_5232_cast_fp16))[name = tensor<string, []>("op_5260_cast_fp16")];
+            tensor<string, []> var_5262_equation_0 = const()[name = tensor<string, []>("op_5262_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5262_cast_fp16 = einsum(equation = var_5262_equation_0, values = (var_5172_cast_fp16_12, var_5233_cast_fp16))[name = tensor<string, []>("op_5262_cast_fp16")];
+            tensor<string, []> var_5264_equation_0 = const()[name = tensor<string, []>("op_5264_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5264_cast_fp16 = einsum(equation = var_5264_equation_0, values = (var_5172_cast_fp16_13, var_5234_cast_fp16))[name = tensor<string, []>("op_5264_cast_fp16")];
+            tensor<string, []> var_5266_equation_0 = const()[name = tensor<string, []>("op_5266_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5266_cast_fp16 = einsum(equation = var_5266_equation_0, values = (var_5172_cast_fp16_14, var_5235_cast_fp16))[name = tensor<string, []>("op_5266_cast_fp16")];
+            tensor<string, []> var_5268_equation_0 = const()[name = tensor<string, []>("op_5268_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5268_cast_fp16 = einsum(equation = var_5268_equation_0, values = (var_5172_cast_fp16_15, var_5236_cast_fp16))[name = tensor<string, []>("op_5268_cast_fp16")];
+            tensor<bool, []> input_215_interleave_0 = const()[name = tensor<string, []>("input_215_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_215_cast_fp16 = concat(axis = var_5085, interleave = input_215_interleave_0, values = (var_5238_cast_fp16, var_5240_cast_fp16, var_5242_cast_fp16, var_5244_cast_fp16, var_5246_cast_fp16, var_5248_cast_fp16, var_5250_cast_fp16, var_5252_cast_fp16, var_5254_cast_fp16, var_5256_cast_fp16, var_5258_cast_fp16, var_5260_cast_fp16, var_5262_cast_fp16, var_5264_cast_fp16, var_5266_cast_fp16, var_5268_cast_fp16))[name = tensor<string, []>("input_215_cast_fp16")];
+            tensor<string, []> var_5277_pad_type_0 = const()[name = tensor<string, []>("op_5277_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5277_strides_0 = const()[name = tensor<string, []>("op_5277_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5277_pad_0 = const()[name = tensor<string, []>("op_5277_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5277_dilations_0 = const()[name = tensor<string, []>("op_5277_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5277_groups_0 = const()[name = tensor<string, []>("op_5277_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_21_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(545178112)))];
+            tensor<fp16, [1024]> blocks_21_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(547275328)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5277_cast_fp16 = conv(bias = blocks_21_attn_out_bias_to_fp16, dilations = var_5277_dilations_0, groups = var_5277_groups_0, pad = var_5277_pad_0, pad_type = var_5277_pad_type_0, strides = var_5277_strides_0, weight = blocks_21_attn_out_weight_to_fp16, x = input_215_cast_fp16)[name = tensor<string, []>("op_5277_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_87_cast_fp16 = add(x = inputs_85_cast_fp16, y = var_5277_cast_fp16)[name = tensor<string, []>("inputs_87_cast_fp16")];
+            tensor<int32, [1]> input_217_axes_0 = const()[name = tensor<string, []>("input_217_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_217_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_217_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(547277440)))];
+            tensor<fp16, [1024]> input_217_beta_0_to_fp16 = const()[name = tensor<string, []>("input_217_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(547279552)))];
+            tensor<fp16, []> var_5287_to_fp16 = const()[name = tensor<string, []>("op_5287_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_217_cast_fp16 = layer_norm(axes = input_217_axes_0, beta = input_217_beta_0_to_fp16, epsilon = var_5287_to_fp16, gamma = input_217_gamma_0_to_fp16, x = inputs_87_cast_fp16)[name = tensor<string, []>("input_217_cast_fp16")];
+            tensor<string, []> input_219_pad_type_0 = const()[name = tensor<string, []>("input_219_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_219_strides_0 = const()[name = tensor<string, []>("input_219_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_219_pad_0 = const()[name = tensor<string, []>("input_219_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_219_dilations_0 = const()[name = tensor<string, []>("input_219_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_219_groups_0 = const()[name = tensor<string, []>("input_219_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_21_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(547281664)))];
+            tensor<fp16, [4096]> blocks_21_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(555670336)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_219_cast_fp16 = conv(bias = blocks_21_mlp_0_bias_to_fp16, dilations = input_219_dilations_0, groups = input_219_groups_0, pad = input_219_pad_0, pad_type = input_219_pad_type_0, strides = input_219_strides_0, weight = blocks_21_mlp_0_weight_to_fp16, x = input_217_cast_fp16)[name = tensor<string, []>("input_219_cast_fp16")];
+            tensor<string, []> input_221_mode_0 = const()[name = tensor<string, []>("input_221_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_221_cast_fp16 = gelu(mode = input_221_mode_0, x = input_219_cast_fp16)[name = tensor<string, []>("input_221_cast_fp16")];
+            tensor<string, []> var_5313_pad_type_0 = const()[name = tensor<string, []>("op_5313_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5313_strides_0 = const()[name = tensor<string, []>("op_5313_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5313_pad_0 = const()[name = tensor<string, []>("op_5313_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5313_dilations_0 = const()[name = tensor<string, []>("op_5313_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5313_groups_0 = const()[name = tensor<string, []>("op_5313_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_21_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(555678592)))];
+            tensor<fp16, [1024]> blocks_21_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_21_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(564067264)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5313_cast_fp16 = conv(bias = blocks_21_mlp_2_bias_to_fp16, dilations = var_5313_dilations_0, groups = var_5313_groups_0, pad = var_5313_pad_0, pad_type = var_5313_pad_type_0, strides = var_5313_strides_0, weight = blocks_21_mlp_2_weight_to_fp16, x = input_221_cast_fp16)[name = tensor<string, []>("op_5313_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_89_cast_fp16 = add(x = inputs_87_cast_fp16, y = var_5313_cast_fp16)[name = tensor<string, []>("inputs_89_cast_fp16")];
+            tensor<int32, []> var_5322 = const()[name = tensor<string, []>("op_5322"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_223_axes_0 = const()[name = tensor<string, []>("input_223_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_223_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_223_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(564069376)))];
+            tensor<fp16, [1024]> input_223_beta_0_to_fp16 = const()[name = tensor<string, []>("input_223_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(564071488)))];
+            tensor<fp16, []> var_5338_to_fp16 = const()[name = tensor<string, []>("op_5338_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_223_cast_fp16 = layer_norm(axes = input_223_axes_0, beta = input_223_beta_0_to_fp16, epsilon = var_5338_to_fp16, gamma = input_223_gamma_0_to_fp16, x = inputs_89_cast_fp16)[name = tensor<string, []>("input_223_cast_fp16")];
+            tensor<string, []> q_45_pad_type_0 = const()[name = tensor<string, []>("q_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_45_strides_0 = const()[name = tensor<string, []>("q_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_45_pad_0 = const()[name = tensor<string, []>("q_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_45_dilations_0 = const()[name = tensor<string, []>("q_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_45_groups_0 = const()[name = tensor<string, []>("q_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_5373_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5373_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(564073600)))];
+            tensor<fp16, [1024]> var_5373_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5373_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(566170816)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5373_cast_fp16 = conv(bias = var_5373_bias_0_to_fp16, dilations = q_45_dilations_0, groups = q_45_groups_0, pad = q_45_pad_0, pad_type = q_45_pad_type_0, strides = q_45_strides_0, weight = var_5373_weight_0_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("op_5373_cast_fp16")];
+            tensor<string, []> k_45_pad_type_0 = const()[name = tensor<string, []>("k_45_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_45_strides_0 = const()[name = tensor<string, []>("k_45_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_45_pad_0 = const()[name = tensor<string, []>("k_45_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_45_dilations_0 = const()[name = tensor<string, []>("k_45_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_45_groups_0 = const()[name = tensor<string, []>("k_45_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_22_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(566172928)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_45_cast_fp16 = conv(dilations = k_45_dilations_0, groups = k_45_groups_0, pad = k_45_pad_0, pad_type = k_45_pad_type_0, strides = k_45_strides_0, weight = blocks_22_attn_key_weight_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("k_45_cast_fp16")];
+            tensor<string, []> var_5371_pad_type_0 = const()[name = tensor<string, []>("op_5371_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5371_strides_0 = const()[name = tensor<string, []>("op_5371_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5371_pad_0 = const()[name = tensor<string, []>("op_5371_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5371_dilations_0 = const()[name = tensor<string, []>("op_5371_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5371_groups_0 = const()[name = tensor<string, []>("op_5371_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_22_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(568270144)))];
+            tensor<fp16, [1024]> blocks_22_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(570367360)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5371_cast_fp16 = conv(bias = blocks_22_attn_value_bias_to_fp16, dilations = var_5371_dilations_0, groups = var_5371_groups_0, pad = var_5371_pad_0, pad_type = var_5371_pad_type_0, strides = var_5371_strides_0, weight = blocks_22_attn_value_weight_to_fp16, x = input_223_cast_fp16)[name = tensor<string, []>("op_5371_cast_fp16")];
+            tensor<int32, [16]> tile_66 = const()[name = tensor<string, []>("tile_66"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5374_axis_0 = const()[name = tensor<string, []>("op_5374_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5374_cast_fp16_15 = split(axis = var_5374_axis_0, split_sizes = tile_66, x = var_5373_cast_fp16)[name = tensor<string, []>("op_5374_cast_fp16")];
+            tensor<int32, [4]> var_5391_perm_0 = const()[name = tensor<string, []>("op_5391_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_67 = const()[name = tensor<string, []>("tile_67"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5392_axis_0 = const()[name = tensor<string, []>("op_5392_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_5391_cast_fp16 = transpose(perm = var_5391_perm_0, x = k_45_cast_fp16)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5392_cast_fp16_15 = split(axis = var_5392_axis_0, split_sizes = tile_67, x = var_5391_cast_fp16)[name = tensor<string, []>("op_5392_cast_fp16")];
+            tensor<int32, [16]> tile_68 = const()[name = tensor<string, []>("tile_68"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5409_axis_0 = const()[name = tensor<string, []>("op_5409_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5409_cast_fp16_15 = split(axis = var_5409_axis_0, split_sizes = tile_68, x = var_5371_cast_fp16)[name = tensor<string, []>("op_5409_cast_fp16")];
+            tensor<string, []> aw_705_equation_0 = const()[name = tensor<string, []>("aw_705_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_705_cast_fp16 = einsum(equation = aw_705_equation_0, values = (var_5392_cast_fp16_0, var_5374_cast_fp16_0))[name = tensor<string, []>("aw_705_cast_fp16")];
+            tensor<string, []> aw_707_equation_0 = const()[name = tensor<string, []>("aw_707_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_707_cast_fp16 = einsum(equation = aw_707_equation_0, values = (var_5392_cast_fp16_1, var_5374_cast_fp16_1))[name = tensor<string, []>("aw_707_cast_fp16")];
+            tensor<string, []> aw_709_equation_0 = const()[name = tensor<string, []>("aw_709_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_709_cast_fp16 = einsum(equation = aw_709_equation_0, values = (var_5392_cast_fp16_2, var_5374_cast_fp16_2))[name = tensor<string, []>("aw_709_cast_fp16")];
+            tensor<string, []> aw_711_equation_0 = const()[name = tensor<string, []>("aw_711_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_711_cast_fp16 = einsum(equation = aw_711_equation_0, values = (var_5392_cast_fp16_3, var_5374_cast_fp16_3))[name = tensor<string, []>("aw_711_cast_fp16")];
+            tensor<string, []> aw_713_equation_0 = const()[name = tensor<string, []>("aw_713_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_713_cast_fp16 = einsum(equation = aw_713_equation_0, values = (var_5392_cast_fp16_4, var_5374_cast_fp16_4))[name = tensor<string, []>("aw_713_cast_fp16")];
+            tensor<string, []> aw_715_equation_0 = const()[name = tensor<string, []>("aw_715_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_715_cast_fp16 = einsum(equation = aw_715_equation_0, values = (var_5392_cast_fp16_5, var_5374_cast_fp16_5))[name = tensor<string, []>("aw_715_cast_fp16")];
+            tensor<string, []> aw_717_equation_0 = const()[name = tensor<string, []>("aw_717_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_717_cast_fp16 = einsum(equation = aw_717_equation_0, values = (var_5392_cast_fp16_6, var_5374_cast_fp16_6))[name = tensor<string, []>("aw_717_cast_fp16")];
+            tensor<string, []> aw_719_equation_0 = const()[name = tensor<string, []>("aw_719_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_719_cast_fp16 = einsum(equation = aw_719_equation_0, values = (var_5392_cast_fp16_7, var_5374_cast_fp16_7))[name = tensor<string, []>("aw_719_cast_fp16")];
+            tensor<string, []> aw_721_equation_0 = const()[name = tensor<string, []>("aw_721_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_721_cast_fp16 = einsum(equation = aw_721_equation_0, values = (var_5392_cast_fp16_8, var_5374_cast_fp16_8))[name = tensor<string, []>("aw_721_cast_fp16")];
+            tensor<string, []> aw_723_equation_0 = const()[name = tensor<string, []>("aw_723_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_723_cast_fp16 = einsum(equation = aw_723_equation_0, values = (var_5392_cast_fp16_9, var_5374_cast_fp16_9))[name = tensor<string, []>("aw_723_cast_fp16")];
+            tensor<string, []> aw_725_equation_0 = const()[name = tensor<string, []>("aw_725_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_725_cast_fp16 = einsum(equation = aw_725_equation_0, values = (var_5392_cast_fp16_10, var_5374_cast_fp16_10))[name = tensor<string, []>("aw_725_cast_fp16")];
+            tensor<string, []> aw_727_equation_0 = const()[name = tensor<string, []>("aw_727_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_727_cast_fp16 = einsum(equation = aw_727_equation_0, values = (var_5392_cast_fp16_11, var_5374_cast_fp16_11))[name = tensor<string, []>("aw_727_cast_fp16")];
+            tensor<string, []> aw_729_equation_0 = const()[name = tensor<string, []>("aw_729_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_729_cast_fp16 = einsum(equation = aw_729_equation_0, values = (var_5392_cast_fp16_12, var_5374_cast_fp16_12))[name = tensor<string, []>("aw_729_cast_fp16")];
+            tensor<string, []> aw_731_equation_0 = const()[name = tensor<string, []>("aw_731_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_731_cast_fp16 = einsum(equation = aw_731_equation_0, values = (var_5392_cast_fp16_13, var_5374_cast_fp16_13))[name = tensor<string, []>("aw_731_cast_fp16")];
+            tensor<string, []> aw_733_equation_0 = const()[name = tensor<string, []>("aw_733_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_733_cast_fp16 = einsum(equation = aw_733_equation_0, values = (var_5392_cast_fp16_14, var_5374_cast_fp16_14))[name = tensor<string, []>("aw_733_cast_fp16")];
+            tensor<string, []> aw_735_equation_0 = const()[name = tensor<string, []>("aw_735_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_735_cast_fp16 = einsum(equation = aw_735_equation_0, values = (var_5392_cast_fp16_15, var_5374_cast_fp16_15))[name = tensor<string, []>("aw_735_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5458_cast_fp16 = softmax(axis = var_5322, x = aw_705_cast_fp16)[name = tensor<string, []>("op_5458_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5459_cast_fp16 = softmax(axis = var_5322, x = aw_707_cast_fp16)[name = tensor<string, []>("op_5459_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5460_cast_fp16 = softmax(axis = var_5322, x = aw_709_cast_fp16)[name = tensor<string, []>("op_5460_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5461_cast_fp16 = softmax(axis = var_5322, x = aw_711_cast_fp16)[name = tensor<string, []>("op_5461_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5462_cast_fp16 = softmax(axis = var_5322, x = aw_713_cast_fp16)[name = tensor<string, []>("op_5462_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5463_cast_fp16 = softmax(axis = var_5322, x = aw_715_cast_fp16)[name = tensor<string, []>("op_5463_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5464_cast_fp16 = softmax(axis = var_5322, x = aw_717_cast_fp16)[name = tensor<string, []>("op_5464_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5465_cast_fp16 = softmax(axis = var_5322, x = aw_719_cast_fp16)[name = tensor<string, []>("op_5465_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5466_cast_fp16 = softmax(axis = var_5322, x = aw_721_cast_fp16)[name = tensor<string, []>("op_5466_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5467_cast_fp16 = softmax(axis = var_5322, x = aw_723_cast_fp16)[name = tensor<string, []>("op_5467_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5468_cast_fp16 = softmax(axis = var_5322, x = aw_725_cast_fp16)[name = tensor<string, []>("op_5468_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5469_cast_fp16 = softmax(axis = var_5322, x = aw_727_cast_fp16)[name = tensor<string, []>("op_5469_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5470_cast_fp16 = softmax(axis = var_5322, x = aw_729_cast_fp16)[name = tensor<string, []>("op_5470_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5471_cast_fp16 = softmax(axis = var_5322, x = aw_731_cast_fp16)[name = tensor<string, []>("op_5471_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5472_cast_fp16 = softmax(axis = var_5322, x = aw_733_cast_fp16)[name = tensor<string, []>("op_5472_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5473_cast_fp16 = softmax(axis = var_5322, x = aw_735_cast_fp16)[name = tensor<string, []>("op_5473_cast_fp16")];
+            tensor<string, []> var_5475_equation_0 = const()[name = tensor<string, []>("op_5475_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5475_cast_fp16 = einsum(equation = var_5475_equation_0, values = (var_5409_cast_fp16_0, var_5458_cast_fp16))[name = tensor<string, []>("op_5475_cast_fp16")];
+            tensor<string, []> var_5477_equation_0 = const()[name = tensor<string, []>("op_5477_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5477_cast_fp16 = einsum(equation = var_5477_equation_0, values = (var_5409_cast_fp16_1, var_5459_cast_fp16))[name = tensor<string, []>("op_5477_cast_fp16")];
+            tensor<string, []> var_5479_equation_0 = const()[name = tensor<string, []>("op_5479_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5479_cast_fp16 = einsum(equation = var_5479_equation_0, values = (var_5409_cast_fp16_2, var_5460_cast_fp16))[name = tensor<string, []>("op_5479_cast_fp16")];
+            tensor<string, []> var_5481_equation_0 = const()[name = tensor<string, []>("op_5481_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5481_cast_fp16 = einsum(equation = var_5481_equation_0, values = (var_5409_cast_fp16_3, var_5461_cast_fp16))[name = tensor<string, []>("op_5481_cast_fp16")];
+            tensor<string, []> var_5483_equation_0 = const()[name = tensor<string, []>("op_5483_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5483_cast_fp16 = einsum(equation = var_5483_equation_0, values = (var_5409_cast_fp16_4, var_5462_cast_fp16))[name = tensor<string, []>("op_5483_cast_fp16")];
+            tensor<string, []> var_5485_equation_0 = const()[name = tensor<string, []>("op_5485_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5485_cast_fp16 = einsum(equation = var_5485_equation_0, values = (var_5409_cast_fp16_5, var_5463_cast_fp16))[name = tensor<string, []>("op_5485_cast_fp16")];
+            tensor<string, []> var_5487_equation_0 = const()[name = tensor<string, []>("op_5487_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5487_cast_fp16 = einsum(equation = var_5487_equation_0, values = (var_5409_cast_fp16_6, var_5464_cast_fp16))[name = tensor<string, []>("op_5487_cast_fp16")];
+            tensor<string, []> var_5489_equation_0 = const()[name = tensor<string, []>("op_5489_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5489_cast_fp16 = einsum(equation = var_5489_equation_0, values = (var_5409_cast_fp16_7, var_5465_cast_fp16))[name = tensor<string, []>("op_5489_cast_fp16")];
+            tensor<string, []> var_5491_equation_0 = const()[name = tensor<string, []>("op_5491_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5491_cast_fp16 = einsum(equation = var_5491_equation_0, values = (var_5409_cast_fp16_8, var_5466_cast_fp16))[name = tensor<string, []>("op_5491_cast_fp16")];
+            tensor<string, []> var_5493_equation_0 = const()[name = tensor<string, []>("op_5493_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5493_cast_fp16 = einsum(equation = var_5493_equation_0, values = (var_5409_cast_fp16_9, var_5467_cast_fp16))[name = tensor<string, []>("op_5493_cast_fp16")];
+            tensor<string, []> var_5495_equation_0 = const()[name = tensor<string, []>("op_5495_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5495_cast_fp16 = einsum(equation = var_5495_equation_0, values = (var_5409_cast_fp16_10, var_5468_cast_fp16))[name = tensor<string, []>("op_5495_cast_fp16")];
+            tensor<string, []> var_5497_equation_0 = const()[name = tensor<string, []>("op_5497_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5497_cast_fp16 = einsum(equation = var_5497_equation_0, values = (var_5409_cast_fp16_11, var_5469_cast_fp16))[name = tensor<string, []>("op_5497_cast_fp16")];
+            tensor<string, []> var_5499_equation_0 = const()[name = tensor<string, []>("op_5499_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5499_cast_fp16 = einsum(equation = var_5499_equation_0, values = (var_5409_cast_fp16_12, var_5470_cast_fp16))[name = tensor<string, []>("op_5499_cast_fp16")];
+            tensor<string, []> var_5501_equation_0 = const()[name = tensor<string, []>("op_5501_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5501_cast_fp16 = einsum(equation = var_5501_equation_0, values = (var_5409_cast_fp16_13, var_5471_cast_fp16))[name = tensor<string, []>("op_5501_cast_fp16")];
+            tensor<string, []> var_5503_equation_0 = const()[name = tensor<string, []>("op_5503_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5503_cast_fp16 = einsum(equation = var_5503_equation_0, values = (var_5409_cast_fp16_14, var_5472_cast_fp16))[name = tensor<string, []>("op_5503_cast_fp16")];
+            tensor<string, []> var_5505_equation_0 = const()[name = tensor<string, []>("op_5505_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5505_cast_fp16 = einsum(equation = var_5505_equation_0, values = (var_5409_cast_fp16_15, var_5473_cast_fp16))[name = tensor<string, []>("op_5505_cast_fp16")];
+            tensor<bool, []> input_225_interleave_0 = const()[name = tensor<string, []>("input_225_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_225_cast_fp16 = concat(axis = var_5322, interleave = input_225_interleave_0, values = (var_5475_cast_fp16, var_5477_cast_fp16, var_5479_cast_fp16, var_5481_cast_fp16, var_5483_cast_fp16, var_5485_cast_fp16, var_5487_cast_fp16, var_5489_cast_fp16, var_5491_cast_fp16, var_5493_cast_fp16, var_5495_cast_fp16, var_5497_cast_fp16, var_5499_cast_fp16, var_5501_cast_fp16, var_5503_cast_fp16, var_5505_cast_fp16))[name = tensor<string, []>("input_225_cast_fp16")];
+            tensor<string, []> var_5514_pad_type_0 = const()[name = tensor<string, []>("op_5514_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5514_strides_0 = const()[name = tensor<string, []>("op_5514_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5514_pad_0 = const()[name = tensor<string, []>("op_5514_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5514_dilations_0 = const()[name = tensor<string, []>("op_5514_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5514_groups_0 = const()[name = tensor<string, []>("op_5514_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_22_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(570369472)))];
+            tensor<fp16, [1024]> blocks_22_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(572466688)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5514_cast_fp16 = conv(bias = blocks_22_attn_out_bias_to_fp16, dilations = var_5514_dilations_0, groups = var_5514_groups_0, pad = var_5514_pad_0, pad_type = var_5514_pad_type_0, strides = var_5514_strides_0, weight = blocks_22_attn_out_weight_to_fp16, x = input_225_cast_fp16)[name = tensor<string, []>("op_5514_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_91_cast_fp16 = add(x = inputs_89_cast_fp16, y = var_5514_cast_fp16)[name = tensor<string, []>("inputs_91_cast_fp16")];
+            tensor<int32, [1]> input_227_axes_0 = const()[name = tensor<string, []>("input_227_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_227_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_227_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(572468800)))];
+            tensor<fp16, [1024]> input_227_beta_0_to_fp16 = const()[name = tensor<string, []>("input_227_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(572470912)))];
+            tensor<fp16, []> var_5524_to_fp16 = const()[name = tensor<string, []>("op_5524_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_227_cast_fp16 = layer_norm(axes = input_227_axes_0, beta = input_227_beta_0_to_fp16, epsilon = var_5524_to_fp16, gamma = input_227_gamma_0_to_fp16, x = inputs_91_cast_fp16)[name = tensor<string, []>("input_227_cast_fp16")];
+            tensor<string, []> input_229_pad_type_0 = const()[name = tensor<string, []>("input_229_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_229_strides_0 = const()[name = tensor<string, []>("input_229_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_229_pad_0 = const()[name = tensor<string, []>("input_229_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_229_dilations_0 = const()[name = tensor<string, []>("input_229_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_229_groups_0 = const()[name = tensor<string, []>("input_229_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_22_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(572473024)))];
+            tensor<fp16, [4096]> blocks_22_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(580861696)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_229_cast_fp16 = conv(bias = blocks_22_mlp_0_bias_to_fp16, dilations = input_229_dilations_0, groups = input_229_groups_0, pad = input_229_pad_0, pad_type = input_229_pad_type_0, strides = input_229_strides_0, weight = blocks_22_mlp_0_weight_to_fp16, x = input_227_cast_fp16)[name = tensor<string, []>("input_229_cast_fp16")];
+            tensor<string, []> input_231_mode_0 = const()[name = tensor<string, []>("input_231_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_231_cast_fp16 = gelu(mode = input_231_mode_0, x = input_229_cast_fp16)[name = tensor<string, []>("input_231_cast_fp16")];
+            tensor<string, []> var_5550_pad_type_0 = const()[name = tensor<string, []>("op_5550_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5550_strides_0 = const()[name = tensor<string, []>("op_5550_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5550_pad_0 = const()[name = tensor<string, []>("op_5550_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5550_dilations_0 = const()[name = tensor<string, []>("op_5550_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5550_groups_0 = const()[name = tensor<string, []>("op_5550_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_22_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(580869952)))];
+            tensor<fp16, [1024]> blocks_22_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_22_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(589258624)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5550_cast_fp16 = conv(bias = blocks_22_mlp_2_bias_to_fp16, dilations = var_5550_dilations_0, groups = var_5550_groups_0, pad = var_5550_pad_0, pad_type = var_5550_pad_type_0, strides = var_5550_strides_0, weight = blocks_22_mlp_2_weight_to_fp16, x = input_231_cast_fp16)[name = tensor<string, []>("op_5550_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_93_cast_fp16 = add(x = inputs_91_cast_fp16, y = var_5550_cast_fp16)[name = tensor<string, []>("inputs_93_cast_fp16")];
+            tensor<int32, []> var_5559 = const()[name = tensor<string, []>("op_5559"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_233_axes_0 = const()[name = tensor<string, []>("input_233_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_233_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_233_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(589260736)))];
+            tensor<fp16, [1024]> input_233_beta_0_to_fp16 = const()[name = tensor<string, []>("input_233_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(589262848)))];
+            tensor<fp16, []> var_5575_to_fp16 = const()[name = tensor<string, []>("op_5575_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_233_cast_fp16 = layer_norm(axes = input_233_axes_0, beta = input_233_beta_0_to_fp16, epsilon = var_5575_to_fp16, gamma = input_233_gamma_0_to_fp16, x = inputs_93_cast_fp16)[name = tensor<string, []>("input_233_cast_fp16")];
+            tensor<string, []> q_pad_type_0 = const()[name = tensor<string, []>("q_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_strides_0 = const()[name = tensor<string, []>("q_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_pad_0 = const()[name = tensor<string, []>("q_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_dilations_0 = const()[name = tensor<string, []>("q_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_groups_0 = const()[name = tensor<string, []>("q_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> var_5610_weight_0_to_fp16 = const()[name = tensor<string, []>("op_5610_weight_0_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(589264960)))];
+            tensor<fp16, [1024]> var_5610_bias_0_to_fp16 = const()[name = tensor<string, []>("op_5610_bias_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(591362176)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5610_cast_fp16 = conv(bias = var_5610_bias_0_to_fp16, dilations = q_dilations_0, groups = q_groups_0, pad = q_pad_0, pad_type = q_pad_type_0, strides = q_strides_0, weight = var_5610_weight_0_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("op_5610_cast_fp16")];
+            tensor<string, []> k_pad_type_0 = const()[name = tensor<string, []>("k_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_strides_0 = const()[name = tensor<string, []>("k_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_pad_0 = const()[name = tensor<string, []>("k_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_dilations_0 = const()[name = tensor<string, []>("k_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_groups_0 = const()[name = tensor<string, []>("k_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_23_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_key_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(591364288)))];
+            tensor<fp16, [1, 1024, 1, 1500]> k_cast_fp16 = conv(dilations = k_dilations_0, groups = k_groups_0, pad = k_pad_0, pad_type = k_pad_type_0, strides = k_strides_0, weight = blocks_23_attn_key_weight_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("k_cast_fp16")];
+            tensor<string, []> var_5608_pad_type_0 = const()[name = tensor<string, []>("op_5608_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5608_strides_0 = const()[name = tensor<string, []>("op_5608_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5608_pad_0 = const()[name = tensor<string, []>("op_5608_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5608_dilations_0 = const()[name = tensor<string, []>("op_5608_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5608_groups_0 = const()[name = tensor<string, []>("op_5608_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_23_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_value_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(593461504)))];
+            tensor<fp16, [1024]> blocks_23_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_value_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(595558720)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5608_cast_fp16 = conv(bias = blocks_23_attn_value_bias_to_fp16, dilations = var_5608_dilations_0, groups = var_5608_groups_0, pad = var_5608_pad_0, pad_type = var_5608_pad_type_0, strides = var_5608_strides_0, weight = blocks_23_attn_value_weight_to_fp16, x = input_233_cast_fp16)[name = tensor<string, []>("op_5608_cast_fp16")];
+            tensor<int32, [16]> tile_69 = const()[name = tensor<string, []>("tile_69"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5611_axis_0 = const()[name = tensor<string, []>("op_5611_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5611_cast_fp16_15 = split(axis = var_5611_axis_0, split_sizes = tile_69, x = var_5610_cast_fp16)[name = tensor<string, []>("op_5611_cast_fp16")];
+            tensor<int32, [4]> var_5628_perm_0 = const()[name = tensor<string, []>("op_5628_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [16]> tile_70 = const()[name = tensor<string, []>("tile_70"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5629_axis_0 = const()[name = tensor<string, []>("op_5629_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 1024]> var_5628_cast_fp16 = transpose(perm = var_5628_perm_0, x = k_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_11, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_12, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_13, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_14, tensor<fp16, [1, 1500, 1, 64]> var_5629_cast_fp16_15 = split(axis = var_5629_axis_0, split_sizes = tile_70, x = var_5628_cast_fp16)[name = tensor<string, []>("op_5629_cast_fp16")];
+            tensor<int32, [16]> tile_71 = const()[name = tensor<string, []>("tile_71"), val = tensor<int32, [16]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_5646_axis_0 = const()[name = tensor<string, []>("op_5646_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_11, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_12, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_13, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_14, tensor<fp16, [1, 64, 1, 1500]> var_5646_cast_fp16_15 = split(axis = var_5646_axis_0, split_sizes = tile_71, x = var_5608_cast_fp16)[name = tensor<string, []>("op_5646_cast_fp16")];
+            tensor<string, []> aw_737_equation_0 = const()[name = tensor<string, []>("aw_737_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_737_cast_fp16 = einsum(equation = aw_737_equation_0, values = (var_5629_cast_fp16_0, var_5611_cast_fp16_0))[name = tensor<string, []>("aw_737_cast_fp16")];
+            tensor<string, []> aw_739_equation_0 = const()[name = tensor<string, []>("aw_739_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_739_cast_fp16 = einsum(equation = aw_739_equation_0, values = (var_5629_cast_fp16_1, var_5611_cast_fp16_1))[name = tensor<string, []>("aw_739_cast_fp16")];
+            tensor<string, []> aw_741_equation_0 = const()[name = tensor<string, []>("aw_741_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_741_cast_fp16 = einsum(equation = aw_741_equation_0, values = (var_5629_cast_fp16_2, var_5611_cast_fp16_2))[name = tensor<string, []>("aw_741_cast_fp16")];
+            tensor<string, []> aw_743_equation_0 = const()[name = tensor<string, []>("aw_743_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_743_cast_fp16 = einsum(equation = aw_743_equation_0, values = (var_5629_cast_fp16_3, var_5611_cast_fp16_3))[name = tensor<string, []>("aw_743_cast_fp16")];
+            tensor<string, []> aw_745_equation_0 = const()[name = tensor<string, []>("aw_745_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_745_cast_fp16 = einsum(equation = aw_745_equation_0, values = (var_5629_cast_fp16_4, var_5611_cast_fp16_4))[name = tensor<string, []>("aw_745_cast_fp16")];
+            tensor<string, []> aw_747_equation_0 = const()[name = tensor<string, []>("aw_747_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_747_cast_fp16 = einsum(equation = aw_747_equation_0, values = (var_5629_cast_fp16_5, var_5611_cast_fp16_5))[name = tensor<string, []>("aw_747_cast_fp16")];
+            tensor<string, []> aw_749_equation_0 = const()[name = tensor<string, []>("aw_749_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_749_cast_fp16 = einsum(equation = aw_749_equation_0, values = (var_5629_cast_fp16_6, var_5611_cast_fp16_6))[name = tensor<string, []>("aw_749_cast_fp16")];
+            tensor<string, []> aw_751_equation_0 = const()[name = tensor<string, []>("aw_751_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_751_cast_fp16 = einsum(equation = aw_751_equation_0, values = (var_5629_cast_fp16_7, var_5611_cast_fp16_7))[name = tensor<string, []>("aw_751_cast_fp16")];
+            tensor<string, []> aw_753_equation_0 = const()[name = tensor<string, []>("aw_753_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_753_cast_fp16 = einsum(equation = aw_753_equation_0, values = (var_5629_cast_fp16_8, var_5611_cast_fp16_8))[name = tensor<string, []>("aw_753_cast_fp16")];
+            tensor<string, []> aw_755_equation_0 = const()[name = tensor<string, []>("aw_755_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_755_cast_fp16 = einsum(equation = aw_755_equation_0, values = (var_5629_cast_fp16_9, var_5611_cast_fp16_9))[name = tensor<string, []>("aw_755_cast_fp16")];
+            tensor<string, []> aw_757_equation_0 = const()[name = tensor<string, []>("aw_757_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_757_cast_fp16 = einsum(equation = aw_757_equation_0, values = (var_5629_cast_fp16_10, var_5611_cast_fp16_10))[name = tensor<string, []>("aw_757_cast_fp16")];
+            tensor<string, []> aw_759_equation_0 = const()[name = tensor<string, []>("aw_759_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_759_cast_fp16 = einsum(equation = aw_759_equation_0, values = (var_5629_cast_fp16_11, var_5611_cast_fp16_11))[name = tensor<string, []>("aw_759_cast_fp16")];
+            tensor<string, []> aw_761_equation_0 = const()[name = tensor<string, []>("aw_761_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_761_cast_fp16 = einsum(equation = aw_761_equation_0, values = (var_5629_cast_fp16_12, var_5611_cast_fp16_12))[name = tensor<string, []>("aw_761_cast_fp16")];
+            tensor<string, []> aw_763_equation_0 = const()[name = tensor<string, []>("aw_763_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_763_cast_fp16 = einsum(equation = aw_763_equation_0, values = (var_5629_cast_fp16_13, var_5611_cast_fp16_13))[name = tensor<string, []>("aw_763_cast_fp16")];
+            tensor<string, []> aw_765_equation_0 = const()[name = tensor<string, []>("aw_765_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_765_cast_fp16 = einsum(equation = aw_765_equation_0, values = (var_5629_cast_fp16_14, var_5611_cast_fp16_14))[name = tensor<string, []>("aw_765_cast_fp16")];
+            tensor<string, []> aw_equation_0 = const()[name = tensor<string, []>("aw_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_cast_fp16 = einsum(equation = aw_equation_0, values = (var_5629_cast_fp16_15, var_5611_cast_fp16_15))[name = tensor<string, []>("aw_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5695_cast_fp16 = softmax(axis = var_5559, x = aw_737_cast_fp16)[name = tensor<string, []>("op_5695_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5696_cast_fp16 = softmax(axis = var_5559, x = aw_739_cast_fp16)[name = tensor<string, []>("op_5696_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5697_cast_fp16 = softmax(axis = var_5559, x = aw_741_cast_fp16)[name = tensor<string, []>("op_5697_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5698_cast_fp16 = softmax(axis = var_5559, x = aw_743_cast_fp16)[name = tensor<string, []>("op_5698_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5699_cast_fp16 = softmax(axis = var_5559, x = aw_745_cast_fp16)[name = tensor<string, []>("op_5699_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5700_cast_fp16 = softmax(axis = var_5559, x = aw_747_cast_fp16)[name = tensor<string, []>("op_5700_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5701_cast_fp16 = softmax(axis = var_5559, x = aw_749_cast_fp16)[name = tensor<string, []>("op_5701_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5702_cast_fp16 = softmax(axis = var_5559, x = aw_751_cast_fp16)[name = tensor<string, []>("op_5702_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5703_cast_fp16 = softmax(axis = var_5559, x = aw_753_cast_fp16)[name = tensor<string, []>("op_5703_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5704_cast_fp16 = softmax(axis = var_5559, x = aw_755_cast_fp16)[name = tensor<string, []>("op_5704_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5705_cast_fp16 = softmax(axis = var_5559, x = aw_757_cast_fp16)[name = tensor<string, []>("op_5705_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5706_cast_fp16 = softmax(axis = var_5559, x = aw_759_cast_fp16)[name = tensor<string, []>("op_5706_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5707_cast_fp16 = softmax(axis = var_5559, x = aw_761_cast_fp16)[name = tensor<string, []>("op_5707_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5708_cast_fp16 = softmax(axis = var_5559, x = aw_763_cast_fp16)[name = tensor<string, []>("op_5708_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5709_cast_fp16 = softmax(axis = var_5559, x = aw_765_cast_fp16)[name = tensor<string, []>("op_5709_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_5710_cast_fp16 = softmax(axis = var_5559, x = aw_cast_fp16)[name = tensor<string, []>("op_5710_cast_fp16")];
+            tensor<string, []> var_5712_equation_0 = const()[name = tensor<string, []>("op_5712_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5712_cast_fp16 = einsum(equation = var_5712_equation_0, values = (var_5646_cast_fp16_0, var_5695_cast_fp16))[name = tensor<string, []>("op_5712_cast_fp16")];
+            tensor<string, []> var_5714_equation_0 = const()[name = tensor<string, []>("op_5714_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5714_cast_fp16 = einsum(equation = var_5714_equation_0, values = (var_5646_cast_fp16_1, var_5696_cast_fp16))[name = tensor<string, []>("op_5714_cast_fp16")];
+            tensor<string, []> var_5716_equation_0 = const()[name = tensor<string, []>("op_5716_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5716_cast_fp16 = einsum(equation = var_5716_equation_0, values = (var_5646_cast_fp16_2, var_5697_cast_fp16))[name = tensor<string, []>("op_5716_cast_fp16")];
+            tensor<string, []> var_5718_equation_0 = const()[name = tensor<string, []>("op_5718_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5718_cast_fp16 = einsum(equation = var_5718_equation_0, values = (var_5646_cast_fp16_3, var_5698_cast_fp16))[name = tensor<string, []>("op_5718_cast_fp16")];
+            tensor<string, []> var_5720_equation_0 = const()[name = tensor<string, []>("op_5720_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5720_cast_fp16 = einsum(equation = var_5720_equation_0, values = (var_5646_cast_fp16_4, var_5699_cast_fp16))[name = tensor<string, []>("op_5720_cast_fp16")];
+            tensor<string, []> var_5722_equation_0 = const()[name = tensor<string, []>("op_5722_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5722_cast_fp16 = einsum(equation = var_5722_equation_0, values = (var_5646_cast_fp16_5, var_5700_cast_fp16))[name = tensor<string, []>("op_5722_cast_fp16")];
+            tensor<string, []> var_5724_equation_0 = const()[name = tensor<string, []>("op_5724_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5724_cast_fp16 = einsum(equation = var_5724_equation_0, values = (var_5646_cast_fp16_6, var_5701_cast_fp16))[name = tensor<string, []>("op_5724_cast_fp16")];
+            tensor<string, []> var_5726_equation_0 = const()[name = tensor<string, []>("op_5726_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5726_cast_fp16 = einsum(equation = var_5726_equation_0, values = (var_5646_cast_fp16_7, var_5702_cast_fp16))[name = tensor<string, []>("op_5726_cast_fp16")];
+            tensor<string, []> var_5728_equation_0 = const()[name = tensor<string, []>("op_5728_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5728_cast_fp16 = einsum(equation = var_5728_equation_0, values = (var_5646_cast_fp16_8, var_5703_cast_fp16))[name = tensor<string, []>("op_5728_cast_fp16")];
+            tensor<string, []> var_5730_equation_0 = const()[name = tensor<string, []>("op_5730_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5730_cast_fp16 = einsum(equation = var_5730_equation_0, values = (var_5646_cast_fp16_9, var_5704_cast_fp16))[name = tensor<string, []>("op_5730_cast_fp16")];
+            tensor<string, []> var_5732_equation_0 = const()[name = tensor<string, []>("op_5732_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5732_cast_fp16 = einsum(equation = var_5732_equation_0, values = (var_5646_cast_fp16_10, var_5705_cast_fp16))[name = tensor<string, []>("op_5732_cast_fp16")];
+            tensor<string, []> var_5734_equation_0 = const()[name = tensor<string, []>("op_5734_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5734_cast_fp16 = einsum(equation = var_5734_equation_0, values = (var_5646_cast_fp16_11, var_5706_cast_fp16))[name = tensor<string, []>("op_5734_cast_fp16")];
+            tensor<string, []> var_5736_equation_0 = const()[name = tensor<string, []>("op_5736_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5736_cast_fp16 = einsum(equation = var_5736_equation_0, values = (var_5646_cast_fp16_12, var_5707_cast_fp16))[name = tensor<string, []>("op_5736_cast_fp16")];
+            tensor<string, []> var_5738_equation_0 = const()[name = tensor<string, []>("op_5738_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5738_cast_fp16 = einsum(equation = var_5738_equation_0, values = (var_5646_cast_fp16_13, var_5708_cast_fp16))[name = tensor<string, []>("op_5738_cast_fp16")];
+            tensor<string, []> var_5740_equation_0 = const()[name = tensor<string, []>("op_5740_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5740_cast_fp16 = einsum(equation = var_5740_equation_0, values = (var_5646_cast_fp16_14, var_5709_cast_fp16))[name = tensor<string, []>("op_5740_cast_fp16")];
+            tensor<string, []> var_5742_equation_0 = const()[name = tensor<string, []>("op_5742_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_5742_cast_fp16 = einsum(equation = var_5742_equation_0, values = (var_5646_cast_fp16_15, var_5710_cast_fp16))[name = tensor<string, []>("op_5742_cast_fp16")];
+            tensor<bool, []> input_235_interleave_0 = const()[name = tensor<string, []>("input_235_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_235_cast_fp16 = concat(axis = var_5559, interleave = input_235_interleave_0, values = (var_5712_cast_fp16, var_5714_cast_fp16, var_5716_cast_fp16, var_5718_cast_fp16, var_5720_cast_fp16, var_5722_cast_fp16, var_5724_cast_fp16, var_5726_cast_fp16, var_5728_cast_fp16, var_5730_cast_fp16, var_5732_cast_fp16, var_5734_cast_fp16, var_5736_cast_fp16, var_5738_cast_fp16, var_5740_cast_fp16, var_5742_cast_fp16))[name = tensor<string, []>("input_235_cast_fp16")];
+            tensor<string, []> var_5751_pad_type_0 = const()[name = tensor<string, []>("op_5751_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5751_strides_0 = const()[name = tensor<string, []>("op_5751_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5751_pad_0 = const()[name = tensor<string, []>("op_5751_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5751_dilations_0 = const()[name = tensor<string, []>("op_5751_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5751_groups_0 = const()[name = tensor<string, []>("op_5751_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 1024, 1, 1]> blocks_23_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_out_weight_to_fp16"), val = tensor<fp16, [1024, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(595560832)))];
+            tensor<fp16, [1024]> blocks_23_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_attn_out_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(597658048)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5751_cast_fp16 = conv(bias = blocks_23_attn_out_bias_to_fp16, dilations = var_5751_dilations_0, groups = var_5751_groups_0, pad = var_5751_pad_0, pad_type = var_5751_pad_type_0, strides = var_5751_strides_0, weight = blocks_23_attn_out_weight_to_fp16, x = input_235_cast_fp16)[name = tensor<string, []>("op_5751_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_95_cast_fp16 = add(x = inputs_93_cast_fp16, y = var_5751_cast_fp16)[name = tensor<string, []>("inputs_95_cast_fp16")];
+            tensor<int32, [1]> input_237_axes_0 = const()[name = tensor<string, []>("input_237_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> input_237_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_237_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(597660160)))];
+            tensor<fp16, [1024]> input_237_beta_0_to_fp16 = const()[name = tensor<string, []>("input_237_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(597662272)))];
+            tensor<fp16, []> var_5761_to_fp16 = const()[name = tensor<string, []>("op_5761_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> input_237_cast_fp16 = layer_norm(axes = input_237_axes_0, beta = input_237_beta_0_to_fp16, epsilon = var_5761_to_fp16, gamma = input_237_gamma_0_to_fp16, x = inputs_95_cast_fp16)[name = tensor<string, []>("input_237_cast_fp16")];
+            tensor<string, []> input_239_pad_type_0 = const()[name = tensor<string, []>("input_239_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_239_strides_0 = const()[name = tensor<string, []>("input_239_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_239_pad_0 = const()[name = tensor<string, []>("input_239_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_239_dilations_0 = const()[name = tensor<string, []>("input_239_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_239_groups_0 = const()[name = tensor<string, []>("input_239_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [4096, 1024, 1, 1]> blocks_23_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_0_weight_to_fp16"), val = tensor<fp16, [4096, 1024, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(597664384)))];
+            tensor<fp16, [4096]> blocks_23_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_0_bias_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(606053056)))];
+            tensor<fp16, [1, 4096, 1, 1500]> input_239_cast_fp16 = conv(bias = blocks_23_mlp_0_bias_to_fp16, dilations = input_239_dilations_0, groups = input_239_groups_0, pad = input_239_pad_0, pad_type = input_239_pad_type_0, strides = input_239_strides_0, weight = blocks_23_mlp_0_weight_to_fp16, x = input_237_cast_fp16)[name = tensor<string, []>("input_239_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 4096, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_239_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<string, []> var_5787_pad_type_0 = const()[name = tensor<string, []>("op_5787_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_5787_strides_0 = const()[name = tensor<string, []>("op_5787_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5787_pad_0 = const()[name = tensor<string, []>("op_5787_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5787_dilations_0 = const()[name = tensor<string, []>("op_5787_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_5787_groups_0 = const()[name = tensor<string, []>("op_5787_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1024, 4096, 1, 1]> blocks_23_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_2_weight_to_fp16"), val = tensor<fp16, [1024, 4096, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(606061312)))];
+            tensor<fp16, [1024]> blocks_23_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_23_mlp_2_bias_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614449984)))];
+            tensor<fp16, [1, 1024, 1, 1500]> var_5787_cast_fp16 = conv(bias = blocks_23_mlp_2_bias_to_fp16, dilations = var_5787_dilations_0, groups = var_5787_groups_0, pad = var_5787_pad_0, pad_type = var_5787_pad_type_0, strides = var_5787_strides_0, weight = blocks_23_mlp_2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("op_5787_cast_fp16")];
+            tensor<fp16, [1, 1024, 1, 1500]> inputs_cast_fp16 = add(x = inputs_95_cast_fp16, y = var_5787_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<int32, [1]> x_axes_0 = const()[name = tensor<string, []>("x_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1024]> x_gamma_0_to_fp16 = const()[name = tensor<string, []>("x_gamma_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614452096)))];
+            tensor<fp16, [1024]> x_beta_0_to_fp16 = const()[name = tensor<string, []>("x_beta_0_to_fp16"), val = tensor<fp16, [1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(614454208)))];
+            tensor<fp16, []> var_5801_to_fp16 = const()[name = tensor<string, []>("op_5801_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1024, 1, 1500]> x_cast_fp16 = layer_norm(axes = x_axes_0, beta = x_beta_0_to_fp16, epsilon = var_5801_to_fp16, gamma = x_gamma_0_to_fp16, x = inputs_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
+            tensor<int32, [1]> var_5812_axes_0 = const()[name = tensor<string, []>("op_5812_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1024, 1500]> var_5812_cast_fp16 = squeeze(axes = var_5812_axes_0, x = x_cast_fp16)[name = tensor<string, []>("op_5812_cast_fp16")];
+            tensor<int32, [3]> var_5815_perm_0 = const()[name = tensor<string, []>("op_5815_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<string, []> var_5815_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_5815_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
+            tensor<fp16, [1, 1500, 1024]> var_5815_cast_fp16 = transpose(perm = var_5815_perm_0, x = var_5812_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp32, [1, 1500, 1024]> output = cast(dtype = var_5815_cast_fp16_to_fp32_dtype_0, x = var_5815_cast_fp16)[name = tensor<string, []>("cast_99")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/medium/ggml-medium-encoder.mlmodelc/weights/weight.bin b/medium/ggml-medium-encoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..37209ce1111af094d83ca044d0079af339fc9142
--- /dev/null
+++ b/medium/ggml-medium-encoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2972798300feedfddbf8f3f898b8492320d538e8ecddebbfdf0d03a82e00b83
+size 614456320
diff --git a/medium/ggml-medium.bin b/medium/ggml-medium.bin
new file mode 100644
index 0000000000000000000000000000000000000000..be775c464024bdcd9e834c2f9ed42aae6b708d2b
--- /dev/null
+++ b/medium/ggml-medium.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c14d5adee5f86394037b4e4e8b59f1673b6cee10e3cf0b11bbdbee79c156208
+size 1533763059
diff --git a/small.en/.DS_Store b/small.en/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..3ed91b3729b5e763cfdf15a9dc0a94436869230e
Binary files /dev/null and b/small.en/.DS_Store differ
diff --git a/small.en/ggml-small.en-encoder.mlmodelc/analytics/coremldata.bin b/small.en/ggml-small.en-encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b15b26f7f6a3c8a8718cc69c7c4ff81a9990881e
--- /dev/null
+++ b/small.en/ggml-small.en-encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ea4d5727e7b1c8762c548f4b80a30447740200e986075b056cf3da16359da56
+size 243
diff --git a/small.en/ggml-small.en-encoder.mlmodelc/coremldata.bin b/small.en/ggml-small.en-encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2aecfa7c477de9fb63dc789d720d5e2fb0e72d4a
--- /dev/null
+++ b/small.en/ggml-small.en-encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a9554f8fae587d40c808e5a6411d79ce20ab6b473d7a3a88df2cd8123978fb4
+size 320
diff --git a/small.en/ggml-small.en-encoder.mlmodelc/metadata.json b/small.en/ggml-small.en-encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..63a51cf95f700377fe5aa4a5285ac15eb1d85214
--- /dev/null
+++ b/small.en/ggml-small.en-encoder.mlmodelc/metadata.json
@@ -0,0 +1,71 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1500 × 768)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 768]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 6,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 12,
+      "Gelu" : 14,
+      "LayerNorm" : 25,
+      "Transpose" : 13,
+      "Softmax" : 144,
+      "Squeeze" : 1,
+      "Cast" : 2,
+      "Add" : 25,
+      "Einsum" : 288,
+      "ExpandDims" : 1,
+      "Split" : 36,
+      "Conv" : 74
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "12.0",
+      "tvOS" : "15.0",
+      "visionOS" : "1.0",
+      "watchOS" : "8.0",
+      "iOS" : "15.0",
+      "macCatalyst" : "15.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.version" : "8.3.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.2.2"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 80 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "coreml_encoder_small_en",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/small.en/ggml-small.en-encoder.mlmodelc/model.mil b/small.en/ggml-small.en-encoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..0c32f029586d9600dccb30b0eeece3bf02323585
--- /dev/null
+++ b/small.en/ggml-small.en-encoder.mlmodelc/model.mil
@@ -0,0 +1,1663 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.2.2"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
+{
+    func main<ios15>(tensor<fp32, [1, 80, 3000]> logmel_data) {
+            tensor<string, []> var_44_pad_type_0 = const()[name = tensor<string, []>("op_44_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_44_pad_0 = const()[name = tensor<string, []>("op_44_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_44_strides_0 = const()[name = tensor<string, []>("op_44_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_44_dilations_0 = const()[name = tensor<string, []>("op_44_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_44_groups_0 = const()[name = tensor<string, []>("op_44_groups_0"), val = tensor<int32, []>(1)];
+            tensor<string, []> logmel_data_to_fp16_dtype_0 = const()[name = tensor<string, []>("logmel_data_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [768, 80, 3]> const_0_to_fp16 = const()[name = tensor<string, []>("const_0_to_fp16"), val = tensor<fp16, [768, 80, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [768]> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368768)))];
+            tensor<fp16, [1, 80, 3000]> logmel_data_to_fp16 = cast(dtype = logmel_data_to_fp16_dtype_0, x = logmel_data)[name = tensor<string, []>("cast_52")];
+            tensor<fp16, [1, 768, 3000]> var_44_cast_fp16 = conv(bias = const_1_to_fp16, dilations = var_44_dilations_0, groups = var_44_groups_0, pad = var_44_pad_0, pad_type = var_44_pad_type_0, strides = var_44_strides_0, weight = const_0_to_fp16, x = logmel_data_to_fp16)[name = tensor<string, []>("op_44_cast_fp16")];
+            tensor<string, []> input_1_mode_0 = const()[name = tensor<string, []>("input_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 768, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_44_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<string, []> var_62_pad_type_0 = const()[name = tensor<string, []>("op_62_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_62_pad_0 = const()[name = tensor<string, []>("op_62_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_62_strides_0 = const()[name = tensor<string, []>("op_62_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_62_dilations_0 = const()[name = tensor<string, []>("op_62_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_62_groups_0 = const()[name = tensor<string, []>("op_62_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 3]> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, [768, 768, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(370368)))];
+            tensor<fp16, [768]> const_3_to_fp16 = const()[name = tensor<string, []>("const_3_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3909376)))];
+            tensor<fp16, [1, 768, 1500]> var_62_cast_fp16 = conv(bias = const_3_to_fp16, dilations = var_62_dilations_0, groups = var_62_groups_0, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_62_strides_0, weight = const_2_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("op_62_cast_fp16")];
+            tensor<string, []> x_3_mode_0 = const()[name = tensor<string, []>("x_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 768, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_62_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
+            tensor<fp16, [768, 1500]> var_67_to_fp16 = const()[name = tensor<string, []>("op_67_to_fp16"), val = tensor<fp16, [768, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3910976)))];
+            tensor<fp16, [1, 768, 1500]> var_69_cast_fp16 = add(x = x_3_cast_fp16, y = var_67_to_fp16)[name = tensor<string, []>("op_69_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_69_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_84 = const()[name = tensor<string, []>("op_84"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6215040)))];
+            tensor<fp16, [768]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6216640)))];
+            tensor<fp16, []> var_100_to_fp16 = const()[name = tensor<string, []>("op_100_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_3_cast_fp16 = layer_norm(axes = input_3_axes_0, beta = input_3_beta_0_to_fp16, epsilon = var_100_to_fp16, gamma = input_3_gamma_0_to_fp16, x = inputs_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<string, []> q_1_pad_type_0 = const()[name = tensor<string, []>("q_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_1_strides_0 = const()[name = tensor<string, []>("q_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_1_pad_0 = const()[name = tensor<string, []>("q_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_1_dilations_0 = const()[name = tensor<string, []>("q_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_1_groups_0 = const()[name = tensor<string, []>("q_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_135_weight_0_to_fp16 = const()[name = tensor<string, []>("op_135_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6218240)))];
+            tensor<fp16, [768]> var_135_bias_0_to_fp16 = const()[name = tensor<string, []>("op_135_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7397952)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_135_cast_fp16 = conv(bias = var_135_bias_0_to_fp16, dilations = q_1_dilations_0, groups = q_1_groups_0, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = q_1_strides_0, weight = var_135_weight_0_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_135_cast_fp16")];
+            tensor<string, []> k_1_pad_type_0 = const()[name = tensor<string, []>("k_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_1_strides_0 = const()[name = tensor<string, []>("k_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_1_pad_0 = const()[name = tensor<string, []>("k_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_1_dilations_0 = const()[name = tensor<string, []>("k_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_1_groups_0 = const()[name = tensor<string, []>("k_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_0_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7399552)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_1_cast_fp16 = conv(dilations = k_1_dilations_0, groups = k_1_groups_0, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = k_1_strides_0, weight = blocks_0_attn_key_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("k_1_cast_fp16")];
+            tensor<string, []> var_133_pad_type_0 = const()[name = tensor<string, []>("op_133_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_133_strides_0 = const()[name = tensor<string, []>("op_133_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_133_pad_0 = const()[name = tensor<string, []>("op_133_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_133_dilations_0 = const()[name = tensor<string, []>("op_133_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_133_groups_0 = const()[name = tensor<string, []>("op_133_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_0_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8579264)))];
+            tensor<fp16, [768]> blocks_0_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9758976)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_133_cast_fp16 = conv(bias = blocks_0_attn_value_bias_to_fp16, dilations = var_133_dilations_0, groups = var_133_groups_0, pad = var_133_pad_0, pad_type = var_133_pad_type_0, strides = var_133_strides_0, weight = blocks_0_attn_value_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_133_cast_fp16")];
+            tensor<int32, [12]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_136_axis_0 = const()[name = tensor<string, []>("op_136_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_11 = split(axis = var_136_axis_0, split_sizes = tile_0, x = var_135_cast_fp16)[name = tensor<string, []>("op_136_cast_fp16")];
+            tensor<int32, [4]> var_149_perm_0 = const()[name = tensor<string, []>("op_149_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_150_axis_0 = const()[name = tensor<string, []>("op_150_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_149_cast_fp16 = transpose(perm = var_149_perm_0, x = k_1_cast_fp16)[name = tensor<string, []>("transpose_12")];
+            tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_11 = split(axis = var_150_axis_0, split_sizes = tile_1, x = var_149_cast_fp16)[name = tensor<string, []>("op_150_cast_fp16")];
+            tensor<int32, [12]> tile_2 = const()[name = tensor<string, []>("tile_2"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_163_axis_0 = const()[name = tensor<string, []>("op_163_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_11 = split(axis = var_163_axis_0, split_sizes = tile_2, x = var_133_cast_fp16)[name = tensor<string, []>("op_163_cast_fp16")];
+            tensor<string, []> aw_1_equation_0 = const()[name = tensor<string, []>("aw_1_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1_cast_fp16 = einsum(equation = aw_1_equation_0, values = (var_150_cast_fp16_0, var_136_cast_fp16_0))[name = tensor<string, []>("aw_1_cast_fp16")];
+            tensor<string, []> aw_3_equation_0 = const()[name = tensor<string, []>("aw_3_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_3_cast_fp16 = einsum(equation = aw_3_equation_0, values = (var_150_cast_fp16_1, var_136_cast_fp16_1))[name = tensor<string, []>("aw_3_cast_fp16")];
+            tensor<string, []> aw_5_equation_0 = const()[name = tensor<string, []>("aw_5_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_5_cast_fp16 = einsum(equation = aw_5_equation_0, values = (var_150_cast_fp16_2, var_136_cast_fp16_2))[name = tensor<string, []>("aw_5_cast_fp16")];
+            tensor<string, []> aw_7_equation_0 = const()[name = tensor<string, []>("aw_7_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_7_cast_fp16 = einsum(equation = aw_7_equation_0, values = (var_150_cast_fp16_3, var_136_cast_fp16_3))[name = tensor<string, []>("aw_7_cast_fp16")];
+            tensor<string, []> aw_9_equation_0 = const()[name = tensor<string, []>("aw_9_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_9_cast_fp16 = einsum(equation = aw_9_equation_0, values = (var_150_cast_fp16_4, var_136_cast_fp16_4))[name = tensor<string, []>("aw_9_cast_fp16")];
+            tensor<string, []> aw_11_equation_0 = const()[name = tensor<string, []>("aw_11_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_11_cast_fp16 = einsum(equation = aw_11_equation_0, values = (var_150_cast_fp16_5, var_136_cast_fp16_5))[name = tensor<string, []>("aw_11_cast_fp16")];
+            tensor<string, []> aw_13_equation_0 = const()[name = tensor<string, []>("aw_13_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_13_cast_fp16 = einsum(equation = aw_13_equation_0, values = (var_150_cast_fp16_6, var_136_cast_fp16_6))[name = tensor<string, []>("aw_13_cast_fp16")];
+            tensor<string, []> aw_15_equation_0 = const()[name = tensor<string, []>("aw_15_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_15_cast_fp16 = einsum(equation = aw_15_equation_0, values = (var_150_cast_fp16_7, var_136_cast_fp16_7))[name = tensor<string, []>("aw_15_cast_fp16")];
+            tensor<string, []> aw_17_equation_0 = const()[name = tensor<string, []>("aw_17_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_17_cast_fp16 = einsum(equation = aw_17_equation_0, values = (var_150_cast_fp16_8, var_136_cast_fp16_8))[name = tensor<string, []>("aw_17_cast_fp16")];
+            tensor<string, []> aw_19_equation_0 = const()[name = tensor<string, []>("aw_19_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_19_cast_fp16 = einsum(equation = aw_19_equation_0, values = (var_150_cast_fp16_9, var_136_cast_fp16_9))[name = tensor<string, []>("aw_19_cast_fp16")];
+            tensor<string, []> aw_21_equation_0 = const()[name = tensor<string, []>("aw_21_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_21_cast_fp16 = einsum(equation = aw_21_equation_0, values = (var_150_cast_fp16_10, var_136_cast_fp16_10))[name = tensor<string, []>("aw_21_cast_fp16")];
+            tensor<string, []> aw_23_equation_0 = const()[name = tensor<string, []>("aw_23_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_23_cast_fp16 = einsum(equation = aw_23_equation_0, values = (var_150_cast_fp16_11, var_136_cast_fp16_11))[name = tensor<string, []>("aw_23_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_200_cast_fp16 = softmax(axis = var_84, x = aw_1_cast_fp16)[name = tensor<string, []>("op_200_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_201_cast_fp16 = softmax(axis = var_84, x = aw_3_cast_fp16)[name = tensor<string, []>("op_201_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_202_cast_fp16 = softmax(axis = var_84, x = aw_5_cast_fp16)[name = tensor<string, []>("op_202_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_203_cast_fp16 = softmax(axis = var_84, x = aw_7_cast_fp16)[name = tensor<string, []>("op_203_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_204_cast_fp16 = softmax(axis = var_84, x = aw_9_cast_fp16)[name = tensor<string, []>("op_204_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_205_cast_fp16 = softmax(axis = var_84, x = aw_11_cast_fp16)[name = tensor<string, []>("op_205_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_206_cast_fp16 = softmax(axis = var_84, x = aw_13_cast_fp16)[name = tensor<string, []>("op_206_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_207_cast_fp16 = softmax(axis = var_84, x = aw_15_cast_fp16)[name = tensor<string, []>("op_207_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_208_cast_fp16 = softmax(axis = var_84, x = aw_17_cast_fp16)[name = tensor<string, []>("op_208_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_209_cast_fp16 = softmax(axis = var_84, x = aw_19_cast_fp16)[name = tensor<string, []>("op_209_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_210_cast_fp16 = softmax(axis = var_84, x = aw_21_cast_fp16)[name = tensor<string, []>("op_210_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_211_cast_fp16 = softmax(axis = var_84, x = aw_23_cast_fp16)[name = tensor<string, []>("op_211_cast_fp16")];
+            tensor<string, []> var_213_equation_0 = const()[name = tensor<string, []>("op_213_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_213_cast_fp16 = einsum(equation = var_213_equation_0, values = (var_163_cast_fp16_0, var_200_cast_fp16))[name = tensor<string, []>("op_213_cast_fp16")];
+            tensor<string, []> var_215_equation_0 = const()[name = tensor<string, []>("op_215_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_215_cast_fp16 = einsum(equation = var_215_equation_0, values = (var_163_cast_fp16_1, var_201_cast_fp16))[name = tensor<string, []>("op_215_cast_fp16")];
+            tensor<string, []> var_217_equation_0 = const()[name = tensor<string, []>("op_217_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_217_cast_fp16 = einsum(equation = var_217_equation_0, values = (var_163_cast_fp16_2, var_202_cast_fp16))[name = tensor<string, []>("op_217_cast_fp16")];
+            tensor<string, []> var_219_equation_0 = const()[name = tensor<string, []>("op_219_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16 = einsum(equation = var_219_equation_0, values = (var_163_cast_fp16_3, var_203_cast_fp16))[name = tensor<string, []>("op_219_cast_fp16")];
+            tensor<string, []> var_221_equation_0 = const()[name = tensor<string, []>("op_221_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_221_cast_fp16 = einsum(equation = var_221_equation_0, values = (var_163_cast_fp16_4, var_204_cast_fp16))[name = tensor<string, []>("op_221_cast_fp16")];
+            tensor<string, []> var_223_equation_0 = const()[name = tensor<string, []>("op_223_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_223_cast_fp16 = einsum(equation = var_223_equation_0, values = (var_163_cast_fp16_5, var_205_cast_fp16))[name = tensor<string, []>("op_223_cast_fp16")];
+            tensor<string, []> var_225_equation_0 = const()[name = tensor<string, []>("op_225_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_225_cast_fp16 = einsum(equation = var_225_equation_0, values = (var_163_cast_fp16_6, var_206_cast_fp16))[name = tensor<string, []>("op_225_cast_fp16")];
+            tensor<string, []> var_227_equation_0 = const()[name = tensor<string, []>("op_227_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_227_cast_fp16 = einsum(equation = var_227_equation_0, values = (var_163_cast_fp16_7, var_207_cast_fp16))[name = tensor<string, []>("op_227_cast_fp16")];
+            tensor<string, []> var_229_equation_0 = const()[name = tensor<string, []>("op_229_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_229_cast_fp16 = einsum(equation = var_229_equation_0, values = (var_163_cast_fp16_8, var_208_cast_fp16))[name = tensor<string, []>("op_229_cast_fp16")];
+            tensor<string, []> var_231_equation_0 = const()[name = tensor<string, []>("op_231_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_231_cast_fp16 = einsum(equation = var_231_equation_0, values = (var_163_cast_fp16_9, var_209_cast_fp16))[name = tensor<string, []>("op_231_cast_fp16")];
+            tensor<string, []> var_233_equation_0 = const()[name = tensor<string, []>("op_233_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_233_cast_fp16 = einsum(equation = var_233_equation_0, values = (var_163_cast_fp16_10, var_210_cast_fp16))[name = tensor<string, []>("op_233_cast_fp16")];
+            tensor<string, []> var_235_equation_0 = const()[name = tensor<string, []>("op_235_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_235_cast_fp16 = einsum(equation = var_235_equation_0, values = (var_163_cast_fp16_11, var_211_cast_fp16))[name = tensor<string, []>("op_235_cast_fp16")];
+            tensor<bool, []> input_5_interleave_0 = const()[name = tensor<string, []>("input_5_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_5_cast_fp16 = concat(axis = var_84, interleave = input_5_interleave_0, values = (var_213_cast_fp16, var_215_cast_fp16, var_217_cast_fp16, var_219_cast_fp16, var_221_cast_fp16, var_223_cast_fp16, var_225_cast_fp16, var_227_cast_fp16, var_229_cast_fp16, var_231_cast_fp16, var_233_cast_fp16, var_235_cast_fp16))[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> var_244_pad_type_0 = const()[name = tensor<string, []>("op_244_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_244_strides_0 = const()[name = tensor<string, []>("op_244_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_244_pad_0 = const()[name = tensor<string, []>("op_244_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_244_dilations_0 = const()[name = tensor<string, []>("op_244_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_244_groups_0 = const()[name = tensor<string, []>("op_244_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_0_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9760576)))];
+            tensor<fp16, [768]> blocks_0_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10940288)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_244_cast_fp16 = conv(bias = blocks_0_attn_out_bias_to_fp16, dilations = var_244_dilations_0, groups = var_244_groups_0, pad = var_244_pad_0, pad_type = var_244_pad_type_0, strides = var_244_strides_0, weight = blocks_0_attn_out_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("op_244_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = var_244_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> input_7_axes_0 = const()[name = tensor<string, []>("input_7_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_7_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_7_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10941888)))];
+            tensor<fp16, [768]> input_7_beta_0_to_fp16 = const()[name = tensor<string, []>("input_7_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10943488)))];
+            tensor<fp16, []> var_254_to_fp16 = const()[name = tensor<string, []>("op_254_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = input_7_beta_0_to_fp16, epsilon = var_254_to_fp16, gamma = input_7_gamma_0_to_fp16, x = inputs_3_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_pad_type_0 = const()[name = tensor<string, []>("input_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_9_strides_0 = const()[name = tensor<string, []>("input_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_9_pad_0 = const()[name = tensor<string, []>("input_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_9_dilations_0 = const()[name = tensor<string, []>("input_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_9_groups_0 = const()[name = tensor<string, []>("input_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_0_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10945088)))];
+            tensor<fp16, [3072]> blocks_0_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15663744)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_9_cast_fp16 = conv(bias = blocks_0_mlp_0_bias_to_fp16, dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = blocks_0_mlp_0_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<string, []> input_11_mode_0 = const()[name = tensor<string, []>("input_11_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_11_cast_fp16 = gelu(mode = input_11_mode_0, x = input_9_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<string, []> var_280_pad_type_0 = const()[name = tensor<string, []>("op_280_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_280_strides_0 = const()[name = tensor<string, []>("op_280_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_280_pad_0 = const()[name = tensor<string, []>("op_280_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_280_dilations_0 = const()[name = tensor<string, []>("op_280_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_280_groups_0 = const()[name = tensor<string, []>("op_280_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_0_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15669952)))];
+            tensor<fp16, [768]> blocks_0_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20388608)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_280_cast_fp16 = conv(bias = blocks_0_mlp_2_bias_to_fp16, dilations = var_280_dilations_0, groups = var_280_groups_0, pad = var_280_pad_0, pad_type = var_280_pad_type_0, strides = var_280_strides_0, weight = blocks_0_mlp_2_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("op_280_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = var_280_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_289 = const()[name = tensor<string, []>("op_289"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_13_axes_0 = const()[name = tensor<string, []>("input_13_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_13_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20390208)))];
+            tensor<fp16, [768]> input_13_beta_0_to_fp16 = const()[name = tensor<string, []>("input_13_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20391808)))];
+            tensor<fp16, []> var_305_to_fp16 = const()[name = tensor<string, []>("op_305_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = input_13_beta_0_to_fp16, epsilon = var_305_to_fp16, gamma = input_13_gamma_0_to_fp16, x = inputs_5_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> q_3_pad_type_0 = const()[name = tensor<string, []>("q_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_3_strides_0 = const()[name = tensor<string, []>("q_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_3_pad_0 = const()[name = tensor<string, []>("q_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_3_dilations_0 = const()[name = tensor<string, []>("q_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_3_groups_0 = const()[name = tensor<string, []>("q_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_340_weight_0_to_fp16 = const()[name = tensor<string, []>("op_340_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20393408)))];
+            tensor<fp16, [768]> var_340_bias_0_to_fp16 = const()[name = tensor<string, []>("op_340_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21573120)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_340_cast_fp16 = conv(bias = var_340_bias_0_to_fp16, dilations = q_3_dilations_0, groups = q_3_groups_0, pad = q_3_pad_0, pad_type = q_3_pad_type_0, strides = q_3_strides_0, weight = var_340_weight_0_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_340_cast_fp16")];
+            tensor<string, []> k_3_pad_type_0 = const()[name = tensor<string, []>("k_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_3_strides_0 = const()[name = tensor<string, []>("k_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_3_pad_0 = const()[name = tensor<string, []>("k_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_3_dilations_0 = const()[name = tensor<string, []>("k_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_3_groups_0 = const()[name = tensor<string, []>("k_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_1_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21574720)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_3_cast_fp16 = conv(dilations = k_3_dilations_0, groups = k_3_groups_0, pad = k_3_pad_0, pad_type = k_3_pad_type_0, strides = k_3_strides_0, weight = blocks_1_attn_key_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("k_3_cast_fp16")];
+            tensor<string, []> var_338_pad_type_0 = const()[name = tensor<string, []>("op_338_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_338_strides_0 = const()[name = tensor<string, []>("op_338_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_338_pad_0 = const()[name = tensor<string, []>("op_338_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_338_dilations_0 = const()[name = tensor<string, []>("op_338_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_338_groups_0 = const()[name = tensor<string, []>("op_338_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_1_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22754432)))];
+            tensor<fp16, [768]> blocks_1_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23934144)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_338_cast_fp16 = conv(bias = blocks_1_attn_value_bias_to_fp16, dilations = var_338_dilations_0, groups = var_338_groups_0, pad = var_338_pad_0, pad_type = var_338_pad_type_0, strides = var_338_strides_0, weight = blocks_1_attn_value_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_338_cast_fp16")];
+            tensor<int32, [12]> tile_3 = const()[name = tensor<string, []>("tile_3"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_341_axis_0 = const()[name = tensor<string, []>("op_341_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_11 = split(axis = var_341_axis_0, split_sizes = tile_3, x = var_340_cast_fp16)[name = tensor<string, []>("op_341_cast_fp16")];
+            tensor<int32, [4]> var_354_perm_0 = const()[name = tensor<string, []>("op_354_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_4 = const()[name = tensor<string, []>("tile_4"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_355_axis_0 = const()[name = tensor<string, []>("op_355_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_354_cast_fp16 = transpose(perm = var_354_perm_0, x = k_3_cast_fp16)[name = tensor<string, []>("transpose_11")];
+            tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_11 = split(axis = var_355_axis_0, split_sizes = tile_4, x = var_354_cast_fp16)[name = tensor<string, []>("op_355_cast_fp16")];
+            tensor<int32, [12]> tile_5 = const()[name = tensor<string, []>("tile_5"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_368_axis_0 = const()[name = tensor<string, []>("op_368_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_11 = split(axis = var_368_axis_0, split_sizes = tile_5, x = var_338_cast_fp16)[name = tensor<string, []>("op_368_cast_fp16")];
+            tensor<string, []> aw_25_equation_0 = const()[name = tensor<string, []>("aw_25_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_25_cast_fp16 = einsum(equation = aw_25_equation_0, values = (var_355_cast_fp16_0, var_341_cast_fp16_0))[name = tensor<string, []>("aw_25_cast_fp16")];
+            tensor<string, []> aw_27_equation_0 = const()[name = tensor<string, []>("aw_27_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_27_cast_fp16 = einsum(equation = aw_27_equation_0, values = (var_355_cast_fp16_1, var_341_cast_fp16_1))[name = tensor<string, []>("aw_27_cast_fp16")];
+            tensor<string, []> aw_29_equation_0 = const()[name = tensor<string, []>("aw_29_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_29_cast_fp16 = einsum(equation = aw_29_equation_0, values = (var_355_cast_fp16_2, var_341_cast_fp16_2))[name = tensor<string, []>("aw_29_cast_fp16")];
+            tensor<string, []> aw_31_equation_0 = const()[name = tensor<string, []>("aw_31_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_31_cast_fp16 = einsum(equation = aw_31_equation_0, values = (var_355_cast_fp16_3, var_341_cast_fp16_3))[name = tensor<string, []>("aw_31_cast_fp16")];
+            tensor<string, []> aw_33_equation_0 = const()[name = tensor<string, []>("aw_33_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_33_cast_fp16 = einsum(equation = aw_33_equation_0, values = (var_355_cast_fp16_4, var_341_cast_fp16_4))[name = tensor<string, []>("aw_33_cast_fp16")];
+            tensor<string, []> aw_35_equation_0 = const()[name = tensor<string, []>("aw_35_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_35_cast_fp16 = einsum(equation = aw_35_equation_0, values = (var_355_cast_fp16_5, var_341_cast_fp16_5))[name = tensor<string, []>("aw_35_cast_fp16")];
+            tensor<string, []> aw_37_equation_0 = const()[name = tensor<string, []>("aw_37_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_37_cast_fp16 = einsum(equation = aw_37_equation_0, values = (var_355_cast_fp16_6, var_341_cast_fp16_6))[name = tensor<string, []>("aw_37_cast_fp16")];
+            tensor<string, []> aw_39_equation_0 = const()[name = tensor<string, []>("aw_39_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_39_cast_fp16 = einsum(equation = aw_39_equation_0, values = (var_355_cast_fp16_7, var_341_cast_fp16_7))[name = tensor<string, []>("aw_39_cast_fp16")];
+            tensor<string, []> aw_41_equation_0 = const()[name = tensor<string, []>("aw_41_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_41_cast_fp16 = einsum(equation = aw_41_equation_0, values = (var_355_cast_fp16_8, var_341_cast_fp16_8))[name = tensor<string, []>("aw_41_cast_fp16")];
+            tensor<string, []> aw_43_equation_0 = const()[name = tensor<string, []>("aw_43_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_43_cast_fp16 = einsum(equation = aw_43_equation_0, values = (var_355_cast_fp16_9, var_341_cast_fp16_9))[name = tensor<string, []>("aw_43_cast_fp16")];
+            tensor<string, []> aw_45_equation_0 = const()[name = tensor<string, []>("aw_45_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_45_cast_fp16 = einsum(equation = aw_45_equation_0, values = (var_355_cast_fp16_10, var_341_cast_fp16_10))[name = tensor<string, []>("aw_45_cast_fp16")];
+            tensor<string, []> aw_47_equation_0 = const()[name = tensor<string, []>("aw_47_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_47_cast_fp16 = einsum(equation = aw_47_equation_0, values = (var_355_cast_fp16_11, var_341_cast_fp16_11))[name = tensor<string, []>("aw_47_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_405_cast_fp16 = softmax(axis = var_289, x = aw_25_cast_fp16)[name = tensor<string, []>("op_405_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_406_cast_fp16 = softmax(axis = var_289, x = aw_27_cast_fp16)[name = tensor<string, []>("op_406_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_407_cast_fp16 = softmax(axis = var_289, x = aw_29_cast_fp16)[name = tensor<string, []>("op_407_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_408_cast_fp16 = softmax(axis = var_289, x = aw_31_cast_fp16)[name = tensor<string, []>("op_408_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_409_cast_fp16 = softmax(axis = var_289, x = aw_33_cast_fp16)[name = tensor<string, []>("op_409_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_410_cast_fp16 = softmax(axis = var_289, x = aw_35_cast_fp16)[name = tensor<string, []>("op_410_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_411_cast_fp16 = softmax(axis = var_289, x = aw_37_cast_fp16)[name = tensor<string, []>("op_411_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_412_cast_fp16 = softmax(axis = var_289, x = aw_39_cast_fp16)[name = tensor<string, []>("op_412_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_413_cast_fp16 = softmax(axis = var_289, x = aw_41_cast_fp16)[name = tensor<string, []>("op_413_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_414_cast_fp16 = softmax(axis = var_289, x = aw_43_cast_fp16)[name = tensor<string, []>("op_414_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_415_cast_fp16 = softmax(axis = var_289, x = aw_45_cast_fp16)[name = tensor<string, []>("op_415_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_416_cast_fp16 = softmax(axis = var_289, x = aw_47_cast_fp16)[name = tensor<string, []>("op_416_cast_fp16")];
+            tensor<string, []> var_418_equation_0 = const()[name = tensor<string, []>("op_418_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_418_cast_fp16 = einsum(equation = var_418_equation_0, values = (var_368_cast_fp16_0, var_405_cast_fp16))[name = tensor<string, []>("op_418_cast_fp16")];
+            tensor<string, []> var_420_equation_0 = const()[name = tensor<string, []>("op_420_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_420_cast_fp16 = einsum(equation = var_420_equation_0, values = (var_368_cast_fp16_1, var_406_cast_fp16))[name = tensor<string, []>("op_420_cast_fp16")];
+            tensor<string, []> var_422_equation_0 = const()[name = tensor<string, []>("op_422_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_422_cast_fp16 = einsum(equation = var_422_equation_0, values = (var_368_cast_fp16_2, var_407_cast_fp16))[name = tensor<string, []>("op_422_cast_fp16")];
+            tensor<string, []> var_424_equation_0 = const()[name = tensor<string, []>("op_424_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_424_cast_fp16 = einsum(equation = var_424_equation_0, values = (var_368_cast_fp16_3, var_408_cast_fp16))[name = tensor<string, []>("op_424_cast_fp16")];
+            tensor<string, []> var_426_equation_0 = const()[name = tensor<string, []>("op_426_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_426_cast_fp16 = einsum(equation = var_426_equation_0, values = (var_368_cast_fp16_4, var_409_cast_fp16))[name = tensor<string, []>("op_426_cast_fp16")];
+            tensor<string, []> var_428_equation_0 = const()[name = tensor<string, []>("op_428_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_428_cast_fp16 = einsum(equation = var_428_equation_0, values = (var_368_cast_fp16_5, var_410_cast_fp16))[name = tensor<string, []>("op_428_cast_fp16")];
+            tensor<string, []> var_430_equation_0 = const()[name = tensor<string, []>("op_430_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_430_cast_fp16 = einsum(equation = var_430_equation_0, values = (var_368_cast_fp16_6, var_411_cast_fp16))[name = tensor<string, []>("op_430_cast_fp16")];
+            tensor<string, []> var_432_equation_0 = const()[name = tensor<string, []>("op_432_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16 = einsum(equation = var_432_equation_0, values = (var_368_cast_fp16_7, var_412_cast_fp16))[name = tensor<string, []>("op_432_cast_fp16")];
+            tensor<string, []> var_434_equation_0 = const()[name = tensor<string, []>("op_434_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_434_cast_fp16 = einsum(equation = var_434_equation_0, values = (var_368_cast_fp16_8, var_413_cast_fp16))[name = tensor<string, []>("op_434_cast_fp16")];
+            tensor<string, []> var_436_equation_0 = const()[name = tensor<string, []>("op_436_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_436_cast_fp16 = einsum(equation = var_436_equation_0, values = (var_368_cast_fp16_9, var_414_cast_fp16))[name = tensor<string, []>("op_436_cast_fp16")];
+            tensor<string, []> var_438_equation_0 = const()[name = tensor<string, []>("op_438_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_438_cast_fp16 = einsum(equation = var_438_equation_0, values = (var_368_cast_fp16_10, var_415_cast_fp16))[name = tensor<string, []>("op_438_cast_fp16")];
+            tensor<string, []> var_440_equation_0 = const()[name = tensor<string, []>("op_440_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_440_cast_fp16 = einsum(equation = var_440_equation_0, values = (var_368_cast_fp16_11, var_416_cast_fp16))[name = tensor<string, []>("op_440_cast_fp16")];
+            tensor<bool, []> input_15_interleave_0 = const()[name = tensor<string, []>("input_15_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_15_cast_fp16 = concat(axis = var_289, interleave = input_15_interleave_0, values = (var_418_cast_fp16, var_420_cast_fp16, var_422_cast_fp16, var_424_cast_fp16, var_426_cast_fp16, var_428_cast_fp16, var_430_cast_fp16, var_432_cast_fp16, var_434_cast_fp16, var_436_cast_fp16, var_438_cast_fp16, var_440_cast_fp16))[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<string, []> var_449_pad_type_0 = const()[name = tensor<string, []>("op_449_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_449_strides_0 = const()[name = tensor<string, []>("op_449_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_449_pad_0 = const()[name = tensor<string, []>("op_449_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_449_dilations_0 = const()[name = tensor<string, []>("op_449_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_449_groups_0 = const()[name = tensor<string, []>("op_449_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_1_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23935744)))];
+            tensor<fp16, [768]> blocks_1_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25115456)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_449_cast_fp16 = conv(bias = blocks_1_attn_out_bias_to_fp16, dilations = var_449_dilations_0, groups = var_449_groups_0, pad = var_449_pad_0, pad_type = var_449_pad_type_0, strides = var_449_strides_0, weight = blocks_1_attn_out_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("op_449_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = var_449_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> input_17_axes_0 = const()[name = tensor<string, []>("input_17_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_17_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_17_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25117056)))];
+            tensor<fp16, [768]> input_17_beta_0_to_fp16 = const()[name = tensor<string, []>("input_17_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25118656)))];
+            tensor<fp16, []> var_459_to_fp16 = const()[name = tensor<string, []>("op_459_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_17_cast_fp16 = layer_norm(axes = input_17_axes_0, beta = input_17_beta_0_to_fp16, epsilon = var_459_to_fp16, gamma = input_17_gamma_0_to_fp16, x = inputs_7_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_pad_type_0 = const()[name = tensor<string, []>("input_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_19_strides_0 = const()[name = tensor<string, []>("input_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_19_pad_0 = const()[name = tensor<string, []>("input_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_19_dilations_0 = const()[name = tensor<string, []>("input_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_19_groups_0 = const()[name = tensor<string, []>("input_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_1_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25120256)))];
+            tensor<fp16, [3072]> blocks_1_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29838912)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_19_cast_fp16 = conv(bias = blocks_1_mlp_0_bias_to_fp16, dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = blocks_1_mlp_0_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<string, []> input_21_mode_0 = const()[name = tensor<string, []>("input_21_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_21_cast_fp16 = gelu(mode = input_21_mode_0, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> var_485_pad_type_0 = const()[name = tensor<string, []>("op_485_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_485_strides_0 = const()[name = tensor<string, []>("op_485_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_485_pad_0 = const()[name = tensor<string, []>("op_485_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_485_dilations_0 = const()[name = tensor<string, []>("op_485_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_485_groups_0 = const()[name = tensor<string, []>("op_485_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_1_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29845120)))];
+            tensor<fp16, [768]> blocks_1_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34563776)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_485_cast_fp16 = conv(bias = blocks_1_mlp_2_bias_to_fp16, dilations = var_485_dilations_0, groups = var_485_groups_0, pad = var_485_pad_0, pad_type = var_485_pad_type_0, strides = var_485_strides_0, weight = blocks_1_mlp_2_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("op_485_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = var_485_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_494 = const()[name = tensor<string, []>("op_494"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_23_axes_0 = const()[name = tensor<string, []>("input_23_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_23_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34565376)))];
+            tensor<fp16, [768]> input_23_beta_0_to_fp16 = const()[name = tensor<string, []>("input_23_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34566976)))];
+            tensor<fp16, []> var_510_to_fp16 = const()[name = tensor<string, []>("op_510_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_23_cast_fp16 = layer_norm(axes = input_23_axes_0, beta = input_23_beta_0_to_fp16, epsilon = var_510_to_fp16, gamma = input_23_gamma_0_to_fp16, x = inputs_9_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<string, []> q_5_pad_type_0 = const()[name = tensor<string, []>("q_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_5_strides_0 = const()[name = tensor<string, []>("q_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_5_pad_0 = const()[name = tensor<string, []>("q_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_5_dilations_0 = const()[name = tensor<string, []>("q_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_5_groups_0 = const()[name = tensor<string, []>("q_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_545_weight_0_to_fp16 = const()[name = tensor<string, []>("op_545_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34568576)))];
+            tensor<fp16, [768]> var_545_bias_0_to_fp16 = const()[name = tensor<string, []>("op_545_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35748288)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_545_cast_fp16 = conv(bias = var_545_bias_0_to_fp16, dilations = q_5_dilations_0, groups = q_5_groups_0, pad = q_5_pad_0, pad_type = q_5_pad_type_0, strides = q_5_strides_0, weight = var_545_weight_0_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_545_cast_fp16")];
+            tensor<string, []> k_5_pad_type_0 = const()[name = tensor<string, []>("k_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_5_strides_0 = const()[name = tensor<string, []>("k_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_5_pad_0 = const()[name = tensor<string, []>("k_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_5_dilations_0 = const()[name = tensor<string, []>("k_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_5_groups_0 = const()[name = tensor<string, []>("k_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_2_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35749888)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_5_cast_fp16 = conv(dilations = k_5_dilations_0, groups = k_5_groups_0, pad = k_5_pad_0, pad_type = k_5_pad_type_0, strides = k_5_strides_0, weight = blocks_2_attn_key_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("k_5_cast_fp16")];
+            tensor<string, []> var_543_pad_type_0 = const()[name = tensor<string, []>("op_543_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_543_strides_0 = const()[name = tensor<string, []>("op_543_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_543_pad_0 = const()[name = tensor<string, []>("op_543_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_543_dilations_0 = const()[name = tensor<string, []>("op_543_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_543_groups_0 = const()[name = tensor<string, []>("op_543_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_2_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36929600)))];
+            tensor<fp16, [768]> blocks_2_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38109312)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_543_cast_fp16 = conv(bias = blocks_2_attn_value_bias_to_fp16, dilations = var_543_dilations_0, groups = var_543_groups_0, pad = var_543_pad_0, pad_type = var_543_pad_type_0, strides = var_543_strides_0, weight = blocks_2_attn_value_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_543_cast_fp16")];
+            tensor<int32, [12]> tile_6 = const()[name = tensor<string, []>("tile_6"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_546_axis_0 = const()[name = tensor<string, []>("op_546_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_11 = split(axis = var_546_axis_0, split_sizes = tile_6, x = var_545_cast_fp16)[name = tensor<string, []>("op_546_cast_fp16")];
+            tensor<int32, [4]> var_559_perm_0 = const()[name = tensor<string, []>("op_559_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_7 = const()[name = tensor<string, []>("tile_7"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_560_axis_0 = const()[name = tensor<string, []>("op_560_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_559_cast_fp16 = transpose(perm = var_559_perm_0, x = k_5_cast_fp16)[name = tensor<string, []>("transpose_10")];
+            tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_11 = split(axis = var_560_axis_0, split_sizes = tile_7, x = var_559_cast_fp16)[name = tensor<string, []>("op_560_cast_fp16")];
+            tensor<int32, [12]> tile_8 = const()[name = tensor<string, []>("tile_8"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_573_axis_0 = const()[name = tensor<string, []>("op_573_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_11 = split(axis = var_573_axis_0, split_sizes = tile_8, x = var_543_cast_fp16)[name = tensor<string, []>("op_573_cast_fp16")];
+            tensor<string, []> aw_49_equation_0 = const()[name = tensor<string, []>("aw_49_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_49_cast_fp16 = einsum(equation = aw_49_equation_0, values = (var_560_cast_fp16_0, var_546_cast_fp16_0))[name = tensor<string, []>("aw_49_cast_fp16")];
+            tensor<string, []> aw_51_equation_0 = const()[name = tensor<string, []>("aw_51_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_51_cast_fp16 = einsum(equation = aw_51_equation_0, values = (var_560_cast_fp16_1, var_546_cast_fp16_1))[name = tensor<string, []>("aw_51_cast_fp16")];
+            tensor<string, []> aw_53_equation_0 = const()[name = tensor<string, []>("aw_53_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_53_cast_fp16 = einsum(equation = aw_53_equation_0, values = (var_560_cast_fp16_2, var_546_cast_fp16_2))[name = tensor<string, []>("aw_53_cast_fp16")];
+            tensor<string, []> aw_55_equation_0 = const()[name = tensor<string, []>("aw_55_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_55_cast_fp16 = einsum(equation = aw_55_equation_0, values = (var_560_cast_fp16_3, var_546_cast_fp16_3))[name = tensor<string, []>("aw_55_cast_fp16")];
+            tensor<string, []> aw_57_equation_0 = const()[name = tensor<string, []>("aw_57_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_57_cast_fp16 = einsum(equation = aw_57_equation_0, values = (var_560_cast_fp16_4, var_546_cast_fp16_4))[name = tensor<string, []>("aw_57_cast_fp16")];
+            tensor<string, []> aw_59_equation_0 = const()[name = tensor<string, []>("aw_59_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_59_cast_fp16 = einsum(equation = aw_59_equation_0, values = (var_560_cast_fp16_5, var_546_cast_fp16_5))[name = tensor<string, []>("aw_59_cast_fp16")];
+            tensor<string, []> aw_61_equation_0 = const()[name = tensor<string, []>("aw_61_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_61_cast_fp16 = einsum(equation = aw_61_equation_0, values = (var_560_cast_fp16_6, var_546_cast_fp16_6))[name = tensor<string, []>("aw_61_cast_fp16")];
+            tensor<string, []> aw_63_equation_0 = const()[name = tensor<string, []>("aw_63_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_63_cast_fp16 = einsum(equation = aw_63_equation_0, values = (var_560_cast_fp16_7, var_546_cast_fp16_7))[name = tensor<string, []>("aw_63_cast_fp16")];
+            tensor<string, []> aw_65_equation_0 = const()[name = tensor<string, []>("aw_65_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_65_cast_fp16 = einsum(equation = aw_65_equation_0, values = (var_560_cast_fp16_8, var_546_cast_fp16_8))[name = tensor<string, []>("aw_65_cast_fp16")];
+            tensor<string, []> aw_67_equation_0 = const()[name = tensor<string, []>("aw_67_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_67_cast_fp16 = einsum(equation = aw_67_equation_0, values = (var_560_cast_fp16_9, var_546_cast_fp16_9))[name = tensor<string, []>("aw_67_cast_fp16")];
+            tensor<string, []> aw_69_equation_0 = const()[name = tensor<string, []>("aw_69_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_69_cast_fp16 = einsum(equation = aw_69_equation_0, values = (var_560_cast_fp16_10, var_546_cast_fp16_10))[name = tensor<string, []>("aw_69_cast_fp16")];
+            tensor<string, []> aw_71_equation_0 = const()[name = tensor<string, []>("aw_71_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_71_cast_fp16 = einsum(equation = aw_71_equation_0, values = (var_560_cast_fp16_11, var_546_cast_fp16_11))[name = tensor<string, []>("aw_71_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_610_cast_fp16 = softmax(axis = var_494, x = aw_49_cast_fp16)[name = tensor<string, []>("op_610_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_611_cast_fp16 = softmax(axis = var_494, x = aw_51_cast_fp16)[name = tensor<string, []>("op_611_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_612_cast_fp16 = softmax(axis = var_494, x = aw_53_cast_fp16)[name = tensor<string, []>("op_612_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_613_cast_fp16 = softmax(axis = var_494, x = aw_55_cast_fp16)[name = tensor<string, []>("op_613_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_614_cast_fp16 = softmax(axis = var_494, x = aw_57_cast_fp16)[name = tensor<string, []>("op_614_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_615_cast_fp16 = softmax(axis = var_494, x = aw_59_cast_fp16)[name = tensor<string, []>("op_615_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_616_cast_fp16 = softmax(axis = var_494, x = aw_61_cast_fp16)[name = tensor<string, []>("op_616_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_617_cast_fp16 = softmax(axis = var_494, x = aw_63_cast_fp16)[name = tensor<string, []>("op_617_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_618_cast_fp16 = softmax(axis = var_494, x = aw_65_cast_fp16)[name = tensor<string, []>("op_618_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_619_cast_fp16 = softmax(axis = var_494, x = aw_67_cast_fp16)[name = tensor<string, []>("op_619_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_620_cast_fp16 = softmax(axis = var_494, x = aw_69_cast_fp16)[name = tensor<string, []>("op_620_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_621_cast_fp16 = softmax(axis = var_494, x = aw_71_cast_fp16)[name = tensor<string, []>("op_621_cast_fp16")];
+            tensor<string, []> var_623_equation_0 = const()[name = tensor<string, []>("op_623_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_573_cast_fp16_0, var_610_cast_fp16))[name = tensor<string, []>("op_623_cast_fp16")];
+            tensor<string, []> var_625_equation_0 = const()[name = tensor<string, []>("op_625_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_573_cast_fp16_1, var_611_cast_fp16))[name = tensor<string, []>("op_625_cast_fp16")];
+            tensor<string, []> var_627_equation_0 = const()[name = tensor<string, []>("op_627_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_573_cast_fp16_2, var_612_cast_fp16))[name = tensor<string, []>("op_627_cast_fp16")];
+            tensor<string, []> var_629_equation_0 = const()[name = tensor<string, []>("op_629_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_573_cast_fp16_3, var_613_cast_fp16))[name = tensor<string, []>("op_629_cast_fp16")];
+            tensor<string, []> var_631_equation_0 = const()[name = tensor<string, []>("op_631_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_573_cast_fp16_4, var_614_cast_fp16))[name = tensor<string, []>("op_631_cast_fp16")];
+            tensor<string, []> var_633_equation_0 = const()[name = tensor<string, []>("op_633_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_573_cast_fp16_5, var_615_cast_fp16))[name = tensor<string, []>("op_633_cast_fp16")];
+            tensor<string, []> var_635_equation_0 = const()[name = tensor<string, []>("op_635_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_573_cast_fp16_6, var_616_cast_fp16))[name = tensor<string, []>("op_635_cast_fp16")];
+            tensor<string, []> var_637_equation_0 = const()[name = tensor<string, []>("op_637_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_573_cast_fp16_7, var_617_cast_fp16))[name = tensor<string, []>("op_637_cast_fp16")];
+            tensor<string, []> var_639_equation_0 = const()[name = tensor<string, []>("op_639_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_573_cast_fp16_8, var_618_cast_fp16))[name = tensor<string, []>("op_639_cast_fp16")];
+            tensor<string, []> var_641_equation_0 = const()[name = tensor<string, []>("op_641_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_573_cast_fp16_9, var_619_cast_fp16))[name = tensor<string, []>("op_641_cast_fp16")];
+            tensor<string, []> var_643_equation_0 = const()[name = tensor<string, []>("op_643_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_573_cast_fp16_10, var_620_cast_fp16))[name = tensor<string, []>("op_643_cast_fp16")];
+            tensor<string, []> var_645_equation_0 = const()[name = tensor<string, []>("op_645_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_573_cast_fp16_11, var_621_cast_fp16))[name = tensor<string, []>("op_645_cast_fp16")];
+            tensor<bool, []> input_25_interleave_0 = const()[name = tensor<string, []>("input_25_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_25_cast_fp16 = concat(axis = var_494, interleave = input_25_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16))[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<string, []> var_654_pad_type_0 = const()[name = tensor<string, []>("op_654_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_654_strides_0 = const()[name = tensor<string, []>("op_654_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_654_pad_0 = const()[name = tensor<string, []>("op_654_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_654_dilations_0 = const()[name = tensor<string, []>("op_654_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_654_groups_0 = const()[name = tensor<string, []>("op_654_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_2_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38110912)))];
+            tensor<fp16, [768]> blocks_2_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39290624)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_654_cast_fp16 = conv(bias = blocks_2_attn_out_bias_to_fp16, dilations = var_654_dilations_0, groups = var_654_groups_0, pad = var_654_pad_0, pad_type = var_654_pad_type_0, strides = var_654_strides_0, weight = blocks_2_attn_out_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("op_654_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = var_654_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> input_27_axes_0 = const()[name = tensor<string, []>("input_27_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39292224)))];
+            tensor<fp16, [768]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39293824)))];
+            tensor<fp16, []> var_664_to_fp16 = const()[name = tensor<string, []>("op_664_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_27_cast_fp16 = layer_norm(axes = input_27_axes_0, beta = input_27_beta_0_to_fp16, epsilon = var_664_to_fp16, gamma = input_27_gamma_0_to_fp16, x = inputs_11_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_29_strides_0 = const()[name = tensor<string, []>("input_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_29_dilations_0 = const()[name = tensor<string, []>("input_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_29_groups_0 = const()[name = tensor<string, []>("input_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_2_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39295424)))];
+            tensor<fp16, [3072]> blocks_2_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44014080)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_29_cast_fp16 = conv(bias = blocks_2_mlp_0_bias_to_fp16, dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = blocks_2_mlp_0_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<string, []> var_690_pad_type_0 = const()[name = tensor<string, []>("op_690_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_690_strides_0 = const()[name = tensor<string, []>("op_690_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_690_pad_0 = const()[name = tensor<string, []>("op_690_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_690_dilations_0 = const()[name = tensor<string, []>("op_690_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_690_groups_0 = const()[name = tensor<string, []>("op_690_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_2_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44020288)))];
+            tensor<fp16, [768]> blocks_2_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48738944)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_690_cast_fp16 = conv(bias = blocks_2_mlp_2_bias_to_fp16, dilations = var_690_dilations_0, groups = var_690_groups_0, pad = var_690_pad_0, pad_type = var_690_pad_type_0, strides = var_690_strides_0, weight = blocks_2_mlp_2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("op_690_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = var_690_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_699 = const()[name = tensor<string, []>("op_699"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_33_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_33_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48740544)))];
+            tensor<fp16, [768]> input_33_beta_0_to_fp16 = const()[name = tensor<string, []>("input_33_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48742144)))];
+            tensor<fp16, []> var_715_to_fp16 = const()[name = tensor<string, []>("op_715_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, beta = input_33_beta_0_to_fp16, epsilon = var_715_to_fp16, gamma = input_33_gamma_0_to_fp16, x = inputs_13_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<string, []> q_7_pad_type_0 = const()[name = tensor<string, []>("q_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_7_strides_0 = const()[name = tensor<string, []>("q_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_7_pad_0 = const()[name = tensor<string, []>("q_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_7_dilations_0 = const()[name = tensor<string, []>("q_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_7_groups_0 = const()[name = tensor<string, []>("q_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_750_weight_0_to_fp16 = const()[name = tensor<string, []>("op_750_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48743744)))];
+            tensor<fp16, [768]> var_750_bias_0_to_fp16 = const()[name = tensor<string, []>("op_750_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49923456)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_750_cast_fp16 = conv(bias = var_750_bias_0_to_fp16, dilations = q_7_dilations_0, groups = q_7_groups_0, pad = q_7_pad_0, pad_type = q_7_pad_type_0, strides = q_7_strides_0, weight = var_750_weight_0_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_750_cast_fp16")];
+            tensor<string, []> k_7_pad_type_0 = const()[name = tensor<string, []>("k_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_7_strides_0 = const()[name = tensor<string, []>("k_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_7_pad_0 = const()[name = tensor<string, []>("k_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_7_dilations_0 = const()[name = tensor<string, []>("k_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_7_groups_0 = const()[name = tensor<string, []>("k_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_3_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49925056)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_7_cast_fp16 = conv(dilations = k_7_dilations_0, groups = k_7_groups_0, pad = k_7_pad_0, pad_type = k_7_pad_type_0, strides = k_7_strides_0, weight = blocks_3_attn_key_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("k_7_cast_fp16")];
+            tensor<string, []> var_748_pad_type_0 = const()[name = tensor<string, []>("op_748_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_748_strides_0 = const()[name = tensor<string, []>("op_748_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_748_pad_0 = const()[name = tensor<string, []>("op_748_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_748_dilations_0 = const()[name = tensor<string, []>("op_748_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_748_groups_0 = const()[name = tensor<string, []>("op_748_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_3_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51104768)))];
+            tensor<fp16, [768]> blocks_3_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52284480)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_748_cast_fp16 = conv(bias = blocks_3_attn_value_bias_to_fp16, dilations = var_748_dilations_0, groups = var_748_groups_0, pad = var_748_pad_0, pad_type = var_748_pad_type_0, strides = var_748_strides_0, weight = blocks_3_attn_value_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_748_cast_fp16")];
+            tensor<int32, [12]> tile_9 = const()[name = tensor<string, []>("tile_9"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_751_axis_0 = const()[name = tensor<string, []>("op_751_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_11 = split(axis = var_751_axis_0, split_sizes = tile_9, x = var_750_cast_fp16)[name = tensor<string, []>("op_751_cast_fp16")];
+            tensor<int32, [4]> var_764_perm_0 = const()[name = tensor<string, []>("op_764_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_10 = const()[name = tensor<string, []>("tile_10"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_765_axis_0 = const()[name = tensor<string, []>("op_765_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_764_cast_fp16 = transpose(perm = var_764_perm_0, x = k_7_cast_fp16)[name = tensor<string, []>("transpose_9")];
+            tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_11 = split(axis = var_765_axis_0, split_sizes = tile_10, x = var_764_cast_fp16)[name = tensor<string, []>("op_765_cast_fp16")];
+            tensor<int32, [12]> tile_11 = const()[name = tensor<string, []>("tile_11"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_778_axis_0 = const()[name = tensor<string, []>("op_778_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_11 = split(axis = var_778_axis_0, split_sizes = tile_11, x = var_748_cast_fp16)[name = tensor<string, []>("op_778_cast_fp16")];
+            tensor<string, []> aw_73_equation_0 = const()[name = tensor<string, []>("aw_73_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_73_cast_fp16 = einsum(equation = aw_73_equation_0, values = (var_765_cast_fp16_0, var_751_cast_fp16_0))[name = tensor<string, []>("aw_73_cast_fp16")];
+            tensor<string, []> aw_75_equation_0 = const()[name = tensor<string, []>("aw_75_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_75_cast_fp16 = einsum(equation = aw_75_equation_0, values = (var_765_cast_fp16_1, var_751_cast_fp16_1))[name = tensor<string, []>("aw_75_cast_fp16")];
+            tensor<string, []> aw_77_equation_0 = const()[name = tensor<string, []>("aw_77_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_77_cast_fp16 = einsum(equation = aw_77_equation_0, values = (var_765_cast_fp16_2, var_751_cast_fp16_2))[name = tensor<string, []>("aw_77_cast_fp16")];
+            tensor<string, []> aw_79_equation_0 = const()[name = tensor<string, []>("aw_79_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_79_cast_fp16 = einsum(equation = aw_79_equation_0, values = (var_765_cast_fp16_3, var_751_cast_fp16_3))[name = tensor<string, []>("aw_79_cast_fp16")];
+            tensor<string, []> aw_81_equation_0 = const()[name = tensor<string, []>("aw_81_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_81_cast_fp16 = einsum(equation = aw_81_equation_0, values = (var_765_cast_fp16_4, var_751_cast_fp16_4))[name = tensor<string, []>("aw_81_cast_fp16")];
+            tensor<string, []> aw_83_equation_0 = const()[name = tensor<string, []>("aw_83_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_83_cast_fp16 = einsum(equation = aw_83_equation_0, values = (var_765_cast_fp16_5, var_751_cast_fp16_5))[name = tensor<string, []>("aw_83_cast_fp16")];
+            tensor<string, []> aw_85_equation_0 = const()[name = tensor<string, []>("aw_85_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_85_cast_fp16 = einsum(equation = aw_85_equation_0, values = (var_765_cast_fp16_6, var_751_cast_fp16_6))[name = tensor<string, []>("aw_85_cast_fp16")];
+            tensor<string, []> aw_87_equation_0 = const()[name = tensor<string, []>("aw_87_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_87_cast_fp16 = einsum(equation = aw_87_equation_0, values = (var_765_cast_fp16_7, var_751_cast_fp16_7))[name = tensor<string, []>("aw_87_cast_fp16")];
+            tensor<string, []> aw_89_equation_0 = const()[name = tensor<string, []>("aw_89_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_89_cast_fp16 = einsum(equation = aw_89_equation_0, values = (var_765_cast_fp16_8, var_751_cast_fp16_8))[name = tensor<string, []>("aw_89_cast_fp16")];
+            tensor<string, []> aw_91_equation_0 = const()[name = tensor<string, []>("aw_91_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_91_cast_fp16 = einsum(equation = aw_91_equation_0, values = (var_765_cast_fp16_9, var_751_cast_fp16_9))[name = tensor<string, []>("aw_91_cast_fp16")];
+            tensor<string, []> aw_93_equation_0 = const()[name = tensor<string, []>("aw_93_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_93_cast_fp16 = einsum(equation = aw_93_equation_0, values = (var_765_cast_fp16_10, var_751_cast_fp16_10))[name = tensor<string, []>("aw_93_cast_fp16")];
+            tensor<string, []> aw_95_equation_0 = const()[name = tensor<string, []>("aw_95_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_95_cast_fp16 = einsum(equation = aw_95_equation_0, values = (var_765_cast_fp16_11, var_751_cast_fp16_11))[name = tensor<string, []>("aw_95_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_815_cast_fp16 = softmax(axis = var_699, x = aw_73_cast_fp16)[name = tensor<string, []>("op_815_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_816_cast_fp16 = softmax(axis = var_699, x = aw_75_cast_fp16)[name = tensor<string, []>("op_816_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_817_cast_fp16 = softmax(axis = var_699, x = aw_77_cast_fp16)[name = tensor<string, []>("op_817_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_818_cast_fp16 = softmax(axis = var_699, x = aw_79_cast_fp16)[name = tensor<string, []>("op_818_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_819_cast_fp16 = softmax(axis = var_699, x = aw_81_cast_fp16)[name = tensor<string, []>("op_819_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_820_cast_fp16 = softmax(axis = var_699, x = aw_83_cast_fp16)[name = tensor<string, []>("op_820_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_821_cast_fp16 = softmax(axis = var_699, x = aw_85_cast_fp16)[name = tensor<string, []>("op_821_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_822_cast_fp16 = softmax(axis = var_699, x = aw_87_cast_fp16)[name = tensor<string, []>("op_822_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_823_cast_fp16 = softmax(axis = var_699, x = aw_89_cast_fp16)[name = tensor<string, []>("op_823_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_824_cast_fp16 = softmax(axis = var_699, x = aw_91_cast_fp16)[name = tensor<string, []>("op_824_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_825_cast_fp16 = softmax(axis = var_699, x = aw_93_cast_fp16)[name = tensor<string, []>("op_825_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_826_cast_fp16 = softmax(axis = var_699, x = aw_95_cast_fp16)[name = tensor<string, []>("op_826_cast_fp16")];
+            tensor<string, []> var_828_equation_0 = const()[name = tensor<string, []>("op_828_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_828_cast_fp16 = einsum(equation = var_828_equation_0, values = (var_778_cast_fp16_0, var_815_cast_fp16))[name = tensor<string, []>("op_828_cast_fp16")];
+            tensor<string, []> var_830_equation_0 = const()[name = tensor<string, []>("op_830_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_830_cast_fp16 = einsum(equation = var_830_equation_0, values = (var_778_cast_fp16_1, var_816_cast_fp16))[name = tensor<string, []>("op_830_cast_fp16")];
+            tensor<string, []> var_832_equation_0 = const()[name = tensor<string, []>("op_832_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_832_cast_fp16 = einsum(equation = var_832_equation_0, values = (var_778_cast_fp16_2, var_817_cast_fp16))[name = tensor<string, []>("op_832_cast_fp16")];
+            tensor<string, []> var_834_equation_0 = const()[name = tensor<string, []>("op_834_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_834_cast_fp16 = einsum(equation = var_834_equation_0, values = (var_778_cast_fp16_3, var_818_cast_fp16))[name = tensor<string, []>("op_834_cast_fp16")];
+            tensor<string, []> var_836_equation_0 = const()[name = tensor<string, []>("op_836_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_836_cast_fp16 = einsum(equation = var_836_equation_0, values = (var_778_cast_fp16_4, var_819_cast_fp16))[name = tensor<string, []>("op_836_cast_fp16")];
+            tensor<string, []> var_838_equation_0 = const()[name = tensor<string, []>("op_838_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_838_cast_fp16 = einsum(equation = var_838_equation_0, values = (var_778_cast_fp16_5, var_820_cast_fp16))[name = tensor<string, []>("op_838_cast_fp16")];
+            tensor<string, []> var_840_equation_0 = const()[name = tensor<string, []>("op_840_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_840_cast_fp16 = einsum(equation = var_840_equation_0, values = (var_778_cast_fp16_6, var_821_cast_fp16))[name = tensor<string, []>("op_840_cast_fp16")];
+            tensor<string, []> var_842_equation_0 = const()[name = tensor<string, []>("op_842_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_842_cast_fp16 = einsum(equation = var_842_equation_0, values = (var_778_cast_fp16_7, var_822_cast_fp16))[name = tensor<string, []>("op_842_cast_fp16")];
+            tensor<string, []> var_844_equation_0 = const()[name = tensor<string, []>("op_844_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_844_cast_fp16 = einsum(equation = var_844_equation_0, values = (var_778_cast_fp16_8, var_823_cast_fp16))[name = tensor<string, []>("op_844_cast_fp16")];
+            tensor<string, []> var_846_equation_0 = const()[name = tensor<string, []>("op_846_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_846_cast_fp16 = einsum(equation = var_846_equation_0, values = (var_778_cast_fp16_9, var_824_cast_fp16))[name = tensor<string, []>("op_846_cast_fp16")];
+            tensor<string, []> var_848_equation_0 = const()[name = tensor<string, []>("op_848_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_848_cast_fp16 = einsum(equation = var_848_equation_0, values = (var_778_cast_fp16_10, var_825_cast_fp16))[name = tensor<string, []>("op_848_cast_fp16")];
+            tensor<string, []> var_850_equation_0 = const()[name = tensor<string, []>("op_850_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_850_cast_fp16 = einsum(equation = var_850_equation_0, values = (var_778_cast_fp16_11, var_826_cast_fp16))[name = tensor<string, []>("op_850_cast_fp16")];
+            tensor<bool, []> input_35_interleave_0 = const()[name = tensor<string, []>("input_35_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_35_cast_fp16 = concat(axis = var_699, interleave = input_35_interleave_0, values = (var_828_cast_fp16, var_830_cast_fp16, var_832_cast_fp16, var_834_cast_fp16, var_836_cast_fp16, var_838_cast_fp16, var_840_cast_fp16, var_842_cast_fp16, var_844_cast_fp16, var_846_cast_fp16, var_848_cast_fp16, var_850_cast_fp16))[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<string, []> var_859_pad_type_0 = const()[name = tensor<string, []>("op_859_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_859_strides_0 = const()[name = tensor<string, []>("op_859_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_859_pad_0 = const()[name = tensor<string, []>("op_859_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_859_dilations_0 = const()[name = tensor<string, []>("op_859_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_859_groups_0 = const()[name = tensor<string, []>("op_859_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_3_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52286080)))];
+            tensor<fp16, [768]> blocks_3_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53465792)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_859_cast_fp16 = conv(bias = blocks_3_attn_out_bias_to_fp16, dilations = var_859_dilations_0, groups = var_859_groups_0, pad = var_859_pad_0, pad_type = var_859_pad_type_0, strides = var_859_strides_0, weight = blocks_3_attn_out_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("op_859_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = var_859_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> input_37_axes_0 = const()[name = tensor<string, []>("input_37_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_37_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53467392)))];
+            tensor<fp16, [768]> input_37_beta_0_to_fp16 = const()[name = tensor<string, []>("input_37_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53468992)))];
+            tensor<fp16, []> var_869_to_fp16 = const()[name = tensor<string, []>("op_869_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = input_37_beta_0_to_fp16, epsilon = var_869_to_fp16, gamma = input_37_gamma_0_to_fp16, x = inputs_15_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_pad_type_0 = const()[name = tensor<string, []>("input_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_39_strides_0 = const()[name = tensor<string, []>("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_39_pad_0 = const()[name = tensor<string, []>("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_39_dilations_0 = const()[name = tensor<string, []>("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_39_groups_0 = const()[name = tensor<string, []>("input_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_3_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53470592)))];
+            tensor<fp16, [3072]> blocks_3_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58189248)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_39_cast_fp16 = conv(bias = blocks_3_mlp_0_bias_to_fp16, dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = blocks_3_mlp_0_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<string, []> input_41_mode_0 = const()[name = tensor<string, []>("input_41_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = input_39_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<string, []> var_895_pad_type_0 = const()[name = tensor<string, []>("op_895_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_895_strides_0 = const()[name = tensor<string, []>("op_895_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_895_pad_0 = const()[name = tensor<string, []>("op_895_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_895_dilations_0 = const()[name = tensor<string, []>("op_895_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_895_groups_0 = const()[name = tensor<string, []>("op_895_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_3_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58195456)))];
+            tensor<fp16, [768]> blocks_3_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62914112)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_895_cast_fp16 = conv(bias = blocks_3_mlp_2_bias_to_fp16, dilations = var_895_dilations_0, groups = var_895_groups_0, pad = var_895_pad_0, pad_type = var_895_pad_type_0, strides = var_895_strides_0, weight = blocks_3_mlp_2_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("op_895_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = var_895_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, []> var_904 = const()[name = tensor<string, []>("op_904"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_43_axes_0 = const()[name = tensor<string, []>("input_43_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_43_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62915712)))];
+            tensor<fp16, [768]> input_43_beta_0_to_fp16 = const()[name = tensor<string, []>("input_43_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62917312)))];
+            tensor<fp16, []> var_920_to_fp16 = const()[name = tensor<string, []>("op_920_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_43_cast_fp16 = layer_norm(axes = input_43_axes_0, beta = input_43_beta_0_to_fp16, epsilon = var_920_to_fp16, gamma = input_43_gamma_0_to_fp16, x = inputs_17_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<string, []> q_9_pad_type_0 = const()[name = tensor<string, []>("q_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_9_strides_0 = const()[name = tensor<string, []>("q_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_9_pad_0 = const()[name = tensor<string, []>("q_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_9_dilations_0 = const()[name = tensor<string, []>("q_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_9_groups_0 = const()[name = tensor<string, []>("q_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_955_weight_0_to_fp16 = const()[name = tensor<string, []>("op_955_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62918912)))];
+            tensor<fp16, [768]> var_955_bias_0_to_fp16 = const()[name = tensor<string, []>("op_955_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64098624)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_955_cast_fp16 = conv(bias = var_955_bias_0_to_fp16, dilations = q_9_dilations_0, groups = q_9_groups_0, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = q_9_strides_0, weight = var_955_weight_0_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_955_cast_fp16")];
+            tensor<string, []> k_9_pad_type_0 = const()[name = tensor<string, []>("k_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_9_strides_0 = const()[name = tensor<string, []>("k_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_9_pad_0 = const()[name = tensor<string, []>("k_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_9_dilations_0 = const()[name = tensor<string, []>("k_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_9_groups_0 = const()[name = tensor<string, []>("k_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_4_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64100224)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_9_cast_fp16 = conv(dilations = k_9_dilations_0, groups = k_9_groups_0, pad = k_9_pad_0, pad_type = k_9_pad_type_0, strides = k_9_strides_0, weight = blocks_4_attn_key_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("k_9_cast_fp16")];
+            tensor<string, []> var_953_pad_type_0 = const()[name = tensor<string, []>("op_953_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_953_strides_0 = const()[name = tensor<string, []>("op_953_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_953_pad_0 = const()[name = tensor<string, []>("op_953_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_953_dilations_0 = const()[name = tensor<string, []>("op_953_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_953_groups_0 = const()[name = tensor<string, []>("op_953_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_4_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65279936)))];
+            tensor<fp16, [768]> blocks_4_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66459648)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_953_cast_fp16 = conv(bias = blocks_4_attn_value_bias_to_fp16, dilations = var_953_dilations_0, groups = var_953_groups_0, pad = var_953_pad_0, pad_type = var_953_pad_type_0, strides = var_953_strides_0, weight = blocks_4_attn_value_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_953_cast_fp16")];
+            tensor<int32, [12]> tile_12 = const()[name = tensor<string, []>("tile_12"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_956_axis_0 = const()[name = tensor<string, []>("op_956_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_11 = split(axis = var_956_axis_0, split_sizes = tile_12, x = var_955_cast_fp16)[name = tensor<string, []>("op_956_cast_fp16")];
+            tensor<int32, [4]> var_969_perm_0 = const()[name = tensor<string, []>("op_969_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_13 = const()[name = tensor<string, []>("tile_13"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_970_axis_0 = const()[name = tensor<string, []>("op_970_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_969_cast_fp16 = transpose(perm = var_969_perm_0, x = k_9_cast_fp16)[name = tensor<string, []>("transpose_8")];
+            tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_11 = split(axis = var_970_axis_0, split_sizes = tile_13, x = var_969_cast_fp16)[name = tensor<string, []>("op_970_cast_fp16")];
+            tensor<int32, [12]> tile_14 = const()[name = tensor<string, []>("tile_14"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_983_axis_0 = const()[name = tensor<string, []>("op_983_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_11 = split(axis = var_983_axis_0, split_sizes = tile_14, x = var_953_cast_fp16)[name = tensor<string, []>("op_983_cast_fp16")];
+            tensor<string, []> aw_97_equation_0 = const()[name = tensor<string, []>("aw_97_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_97_cast_fp16 = einsum(equation = aw_97_equation_0, values = (var_970_cast_fp16_0, var_956_cast_fp16_0))[name = tensor<string, []>("aw_97_cast_fp16")];
+            tensor<string, []> aw_99_equation_0 = const()[name = tensor<string, []>("aw_99_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_99_cast_fp16 = einsum(equation = aw_99_equation_0, values = (var_970_cast_fp16_1, var_956_cast_fp16_1))[name = tensor<string, []>("aw_99_cast_fp16")];
+            tensor<string, []> aw_101_equation_0 = const()[name = tensor<string, []>("aw_101_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_101_cast_fp16 = einsum(equation = aw_101_equation_0, values = (var_970_cast_fp16_2, var_956_cast_fp16_2))[name = tensor<string, []>("aw_101_cast_fp16")];
+            tensor<string, []> aw_103_equation_0 = const()[name = tensor<string, []>("aw_103_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_103_cast_fp16 = einsum(equation = aw_103_equation_0, values = (var_970_cast_fp16_3, var_956_cast_fp16_3))[name = tensor<string, []>("aw_103_cast_fp16")];
+            tensor<string, []> aw_105_equation_0 = const()[name = tensor<string, []>("aw_105_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_105_cast_fp16 = einsum(equation = aw_105_equation_0, values = (var_970_cast_fp16_4, var_956_cast_fp16_4))[name = tensor<string, []>("aw_105_cast_fp16")];
+            tensor<string, []> aw_107_equation_0 = const()[name = tensor<string, []>("aw_107_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_107_cast_fp16 = einsum(equation = aw_107_equation_0, values = (var_970_cast_fp16_5, var_956_cast_fp16_5))[name = tensor<string, []>("aw_107_cast_fp16")];
+            tensor<string, []> aw_109_equation_0 = const()[name = tensor<string, []>("aw_109_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_109_cast_fp16 = einsum(equation = aw_109_equation_0, values = (var_970_cast_fp16_6, var_956_cast_fp16_6))[name = tensor<string, []>("aw_109_cast_fp16")];
+            tensor<string, []> aw_111_equation_0 = const()[name = tensor<string, []>("aw_111_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_111_cast_fp16 = einsum(equation = aw_111_equation_0, values = (var_970_cast_fp16_7, var_956_cast_fp16_7))[name = tensor<string, []>("aw_111_cast_fp16")];
+            tensor<string, []> aw_113_equation_0 = const()[name = tensor<string, []>("aw_113_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_113_cast_fp16 = einsum(equation = aw_113_equation_0, values = (var_970_cast_fp16_8, var_956_cast_fp16_8))[name = tensor<string, []>("aw_113_cast_fp16")];
+            tensor<string, []> aw_115_equation_0 = const()[name = tensor<string, []>("aw_115_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_115_cast_fp16 = einsum(equation = aw_115_equation_0, values = (var_970_cast_fp16_9, var_956_cast_fp16_9))[name = tensor<string, []>("aw_115_cast_fp16")];
+            tensor<string, []> aw_117_equation_0 = const()[name = tensor<string, []>("aw_117_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_117_cast_fp16 = einsum(equation = aw_117_equation_0, values = (var_970_cast_fp16_10, var_956_cast_fp16_10))[name = tensor<string, []>("aw_117_cast_fp16")];
+            tensor<string, []> aw_119_equation_0 = const()[name = tensor<string, []>("aw_119_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_119_cast_fp16 = einsum(equation = aw_119_equation_0, values = (var_970_cast_fp16_11, var_956_cast_fp16_11))[name = tensor<string, []>("aw_119_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1020_cast_fp16 = softmax(axis = var_904, x = aw_97_cast_fp16)[name = tensor<string, []>("op_1020_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1021_cast_fp16 = softmax(axis = var_904, x = aw_99_cast_fp16)[name = tensor<string, []>("op_1021_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1022_cast_fp16 = softmax(axis = var_904, x = aw_101_cast_fp16)[name = tensor<string, []>("op_1022_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1023_cast_fp16 = softmax(axis = var_904, x = aw_103_cast_fp16)[name = tensor<string, []>("op_1023_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1024_cast_fp16 = softmax(axis = var_904, x = aw_105_cast_fp16)[name = tensor<string, []>("op_1024_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1025_cast_fp16 = softmax(axis = var_904, x = aw_107_cast_fp16)[name = tensor<string, []>("op_1025_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1026_cast_fp16 = softmax(axis = var_904, x = aw_109_cast_fp16)[name = tensor<string, []>("op_1026_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1027_cast_fp16 = softmax(axis = var_904, x = aw_111_cast_fp16)[name = tensor<string, []>("op_1027_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1028_cast_fp16 = softmax(axis = var_904, x = aw_113_cast_fp16)[name = tensor<string, []>("op_1028_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1029_cast_fp16 = softmax(axis = var_904, x = aw_115_cast_fp16)[name = tensor<string, []>("op_1029_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1030_cast_fp16 = softmax(axis = var_904, x = aw_117_cast_fp16)[name = tensor<string, []>("op_1030_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1031_cast_fp16 = softmax(axis = var_904, x = aw_119_cast_fp16)[name = tensor<string, []>("op_1031_cast_fp16")];
+            tensor<string, []> var_1033_equation_0 = const()[name = tensor<string, []>("op_1033_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1033_cast_fp16 = einsum(equation = var_1033_equation_0, values = (var_983_cast_fp16_0, var_1020_cast_fp16))[name = tensor<string, []>("op_1033_cast_fp16")];
+            tensor<string, []> var_1035_equation_0 = const()[name = tensor<string, []>("op_1035_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1035_cast_fp16 = einsum(equation = var_1035_equation_0, values = (var_983_cast_fp16_1, var_1021_cast_fp16))[name = tensor<string, []>("op_1035_cast_fp16")];
+            tensor<string, []> var_1037_equation_0 = const()[name = tensor<string, []>("op_1037_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1037_cast_fp16 = einsum(equation = var_1037_equation_0, values = (var_983_cast_fp16_2, var_1022_cast_fp16))[name = tensor<string, []>("op_1037_cast_fp16")];
+            tensor<string, []> var_1039_equation_0 = const()[name = tensor<string, []>("op_1039_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1039_cast_fp16 = einsum(equation = var_1039_equation_0, values = (var_983_cast_fp16_3, var_1023_cast_fp16))[name = tensor<string, []>("op_1039_cast_fp16")];
+            tensor<string, []> var_1041_equation_0 = const()[name = tensor<string, []>("op_1041_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1041_cast_fp16 = einsum(equation = var_1041_equation_0, values = (var_983_cast_fp16_4, var_1024_cast_fp16))[name = tensor<string, []>("op_1041_cast_fp16")];
+            tensor<string, []> var_1043_equation_0 = const()[name = tensor<string, []>("op_1043_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1043_cast_fp16 = einsum(equation = var_1043_equation_0, values = (var_983_cast_fp16_5, var_1025_cast_fp16))[name = tensor<string, []>("op_1043_cast_fp16")];
+            tensor<string, []> var_1045_equation_0 = const()[name = tensor<string, []>("op_1045_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1045_cast_fp16 = einsum(equation = var_1045_equation_0, values = (var_983_cast_fp16_6, var_1026_cast_fp16))[name = tensor<string, []>("op_1045_cast_fp16")];
+            tensor<string, []> var_1047_equation_0 = const()[name = tensor<string, []>("op_1047_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1047_cast_fp16 = einsum(equation = var_1047_equation_0, values = (var_983_cast_fp16_7, var_1027_cast_fp16))[name = tensor<string, []>("op_1047_cast_fp16")];
+            tensor<string, []> var_1049_equation_0 = const()[name = tensor<string, []>("op_1049_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1049_cast_fp16 = einsum(equation = var_1049_equation_0, values = (var_983_cast_fp16_8, var_1028_cast_fp16))[name = tensor<string, []>("op_1049_cast_fp16")];
+            tensor<string, []> var_1051_equation_0 = const()[name = tensor<string, []>("op_1051_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1051_cast_fp16 = einsum(equation = var_1051_equation_0, values = (var_983_cast_fp16_9, var_1029_cast_fp16))[name = tensor<string, []>("op_1051_cast_fp16")];
+            tensor<string, []> var_1053_equation_0 = const()[name = tensor<string, []>("op_1053_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1053_cast_fp16 = einsum(equation = var_1053_equation_0, values = (var_983_cast_fp16_10, var_1030_cast_fp16))[name = tensor<string, []>("op_1053_cast_fp16")];
+            tensor<string, []> var_1055_equation_0 = const()[name = tensor<string, []>("op_1055_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1055_cast_fp16 = einsum(equation = var_1055_equation_0, values = (var_983_cast_fp16_11, var_1031_cast_fp16))[name = tensor<string, []>("op_1055_cast_fp16")];
+            tensor<bool, []> input_45_interleave_0 = const()[name = tensor<string, []>("input_45_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_45_cast_fp16 = concat(axis = var_904, interleave = input_45_interleave_0, values = (var_1033_cast_fp16, var_1035_cast_fp16, var_1037_cast_fp16, var_1039_cast_fp16, var_1041_cast_fp16, var_1043_cast_fp16, var_1045_cast_fp16, var_1047_cast_fp16, var_1049_cast_fp16, var_1051_cast_fp16, var_1053_cast_fp16, var_1055_cast_fp16))[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<string, []> var_1064_pad_type_0 = const()[name = tensor<string, []>("op_1064_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1064_strides_0 = const()[name = tensor<string, []>("op_1064_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1064_pad_0 = const()[name = tensor<string, []>("op_1064_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1064_dilations_0 = const()[name = tensor<string, []>("op_1064_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1064_groups_0 = const()[name = tensor<string, []>("op_1064_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_4_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66461248)))];
+            tensor<fp16, [768]> blocks_4_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67640960)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1064_cast_fp16 = conv(bias = blocks_4_attn_out_bias_to_fp16, dilations = var_1064_dilations_0, groups = var_1064_groups_0, pad = var_1064_pad_0, pad_type = var_1064_pad_type_0, strides = var_1064_strides_0, weight = blocks_4_attn_out_weight_to_fp16, x = input_45_cast_fp16)[name = tensor<string, []>("op_1064_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = var_1064_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, [1]> input_47_axes_0 = const()[name = tensor<string, []>("input_47_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_47_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_47_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67642560)))];
+            tensor<fp16, [768]> input_47_beta_0_to_fp16 = const()[name = tensor<string, []>("input_47_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67644160)))];
+            tensor<fp16, []> var_1074_to_fp16 = const()[name = tensor<string, []>("op_1074_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, beta = input_47_beta_0_to_fp16, epsilon = var_1074_to_fp16, gamma = input_47_gamma_0_to_fp16, x = inputs_19_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<string, []> input_49_pad_type_0 = const()[name = tensor<string, []>("input_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_49_strides_0 = const()[name = tensor<string, []>("input_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_49_pad_0 = const()[name = tensor<string, []>("input_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_49_dilations_0 = const()[name = tensor<string, []>("input_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_49_groups_0 = const()[name = tensor<string, []>("input_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_4_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67645760)))];
+            tensor<fp16, [3072]> blocks_4_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72364416)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_49_cast_fp16 = conv(bias = blocks_4_mlp_0_bias_to_fp16, dilations = input_49_dilations_0, groups = input_49_groups_0, pad = input_49_pad_0, pad_type = input_49_pad_type_0, strides = input_49_strides_0, weight = blocks_4_mlp_0_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<string, []> input_51_mode_0 = const()[name = tensor<string, []>("input_51_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_51_cast_fp16 = gelu(mode = input_51_mode_0, x = input_49_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<string, []> var_1100_pad_type_0 = const()[name = tensor<string, []>("op_1100_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1100_strides_0 = const()[name = tensor<string, []>("op_1100_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1100_pad_0 = const()[name = tensor<string, []>("op_1100_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1100_dilations_0 = const()[name = tensor<string, []>("op_1100_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1100_groups_0 = const()[name = tensor<string, []>("op_1100_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_4_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72370624)))];
+            tensor<fp16, [768]> blocks_4_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77089280)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1100_cast_fp16 = conv(bias = blocks_4_mlp_2_bias_to_fp16, dilations = var_1100_dilations_0, groups = var_1100_groups_0, pad = var_1100_pad_0, pad_type = var_1100_pad_type_0, strides = var_1100_strides_0, weight = blocks_4_mlp_2_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("op_1100_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = var_1100_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, []> var_1109 = const()[name = tensor<string, []>("op_1109"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_53_axes_0 = const()[name = tensor<string, []>("input_53_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_53_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_53_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77090880)))];
+            tensor<fp16, [768]> input_53_beta_0_to_fp16 = const()[name = tensor<string, []>("input_53_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77092480)))];
+            tensor<fp16, []> var_1125_to_fp16 = const()[name = tensor<string, []>("op_1125_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_53_cast_fp16 = layer_norm(axes = input_53_axes_0, beta = input_53_beta_0_to_fp16, epsilon = var_1125_to_fp16, gamma = input_53_gamma_0_to_fp16, x = inputs_21_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<string, []> q_11_pad_type_0 = const()[name = tensor<string, []>("q_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_11_strides_0 = const()[name = tensor<string, []>("q_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_11_pad_0 = const()[name = tensor<string, []>("q_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_11_dilations_0 = const()[name = tensor<string, []>("q_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_11_groups_0 = const()[name = tensor<string, []>("q_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_1160_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1160_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77094080)))];
+            tensor<fp16, [768]> var_1160_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1160_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78273792)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1160_cast_fp16 = conv(bias = var_1160_bias_0_to_fp16, dilations = q_11_dilations_0, groups = q_11_groups_0, pad = q_11_pad_0, pad_type = q_11_pad_type_0, strides = q_11_strides_0, weight = var_1160_weight_0_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_1160_cast_fp16")];
+            tensor<string, []> k_11_pad_type_0 = const()[name = tensor<string, []>("k_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_11_strides_0 = const()[name = tensor<string, []>("k_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_11_pad_0 = const()[name = tensor<string, []>("k_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_11_dilations_0 = const()[name = tensor<string, []>("k_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_11_groups_0 = const()[name = tensor<string, []>("k_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_5_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78275392)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_11_cast_fp16 = conv(dilations = k_11_dilations_0, groups = k_11_groups_0, pad = k_11_pad_0, pad_type = k_11_pad_type_0, strides = k_11_strides_0, weight = blocks_5_attn_key_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("k_11_cast_fp16")];
+            tensor<string, []> var_1158_pad_type_0 = const()[name = tensor<string, []>("op_1158_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1158_strides_0 = const()[name = tensor<string, []>("op_1158_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1158_pad_0 = const()[name = tensor<string, []>("op_1158_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1158_dilations_0 = const()[name = tensor<string, []>("op_1158_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1158_groups_0 = const()[name = tensor<string, []>("op_1158_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_5_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79455104)))];
+            tensor<fp16, [768]> blocks_5_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80634816)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1158_cast_fp16 = conv(bias = blocks_5_attn_value_bias_to_fp16, dilations = var_1158_dilations_0, groups = var_1158_groups_0, pad = var_1158_pad_0, pad_type = var_1158_pad_type_0, strides = var_1158_strides_0, weight = blocks_5_attn_value_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_1158_cast_fp16")];
+            tensor<int32, [12]> tile_15 = const()[name = tensor<string, []>("tile_15"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1161_axis_0 = const()[name = tensor<string, []>("op_1161_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_11 = split(axis = var_1161_axis_0, split_sizes = tile_15, x = var_1160_cast_fp16)[name = tensor<string, []>("op_1161_cast_fp16")];
+            tensor<int32, [4]> var_1174_perm_0 = const()[name = tensor<string, []>("op_1174_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_16 = const()[name = tensor<string, []>("tile_16"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1175_axis_0 = const()[name = tensor<string, []>("op_1175_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_1174_cast_fp16 = transpose(perm = var_1174_perm_0, x = k_11_cast_fp16)[name = tensor<string, []>("transpose_7")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_11 = split(axis = var_1175_axis_0, split_sizes = tile_16, x = var_1174_cast_fp16)[name = tensor<string, []>("op_1175_cast_fp16")];
+            tensor<int32, [12]> tile_17 = const()[name = tensor<string, []>("tile_17"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1188_axis_0 = const()[name = tensor<string, []>("op_1188_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_11 = split(axis = var_1188_axis_0, split_sizes = tile_17, x = var_1158_cast_fp16)[name = tensor<string, []>("op_1188_cast_fp16")];
+            tensor<string, []> aw_121_equation_0 = const()[name = tensor<string, []>("aw_121_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_121_cast_fp16 = einsum(equation = aw_121_equation_0, values = (var_1175_cast_fp16_0, var_1161_cast_fp16_0))[name = tensor<string, []>("aw_121_cast_fp16")];
+            tensor<string, []> aw_123_equation_0 = const()[name = tensor<string, []>("aw_123_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_123_cast_fp16 = einsum(equation = aw_123_equation_0, values = (var_1175_cast_fp16_1, var_1161_cast_fp16_1))[name = tensor<string, []>("aw_123_cast_fp16")];
+            tensor<string, []> aw_125_equation_0 = const()[name = tensor<string, []>("aw_125_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_125_cast_fp16 = einsum(equation = aw_125_equation_0, values = (var_1175_cast_fp16_2, var_1161_cast_fp16_2))[name = tensor<string, []>("aw_125_cast_fp16")];
+            tensor<string, []> aw_127_equation_0 = const()[name = tensor<string, []>("aw_127_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_127_cast_fp16 = einsum(equation = aw_127_equation_0, values = (var_1175_cast_fp16_3, var_1161_cast_fp16_3))[name = tensor<string, []>("aw_127_cast_fp16")];
+            tensor<string, []> aw_129_equation_0 = const()[name = tensor<string, []>("aw_129_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_129_cast_fp16 = einsum(equation = aw_129_equation_0, values = (var_1175_cast_fp16_4, var_1161_cast_fp16_4))[name = tensor<string, []>("aw_129_cast_fp16")];
+            tensor<string, []> aw_131_equation_0 = const()[name = tensor<string, []>("aw_131_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_131_cast_fp16 = einsum(equation = aw_131_equation_0, values = (var_1175_cast_fp16_5, var_1161_cast_fp16_5))[name = tensor<string, []>("aw_131_cast_fp16")];
+            tensor<string, []> aw_133_equation_0 = const()[name = tensor<string, []>("aw_133_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_133_cast_fp16 = einsum(equation = aw_133_equation_0, values = (var_1175_cast_fp16_6, var_1161_cast_fp16_6))[name = tensor<string, []>("aw_133_cast_fp16")];
+            tensor<string, []> aw_135_equation_0 = const()[name = tensor<string, []>("aw_135_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_135_cast_fp16 = einsum(equation = aw_135_equation_0, values = (var_1175_cast_fp16_7, var_1161_cast_fp16_7))[name = tensor<string, []>("aw_135_cast_fp16")];
+            tensor<string, []> aw_137_equation_0 = const()[name = tensor<string, []>("aw_137_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_137_cast_fp16 = einsum(equation = aw_137_equation_0, values = (var_1175_cast_fp16_8, var_1161_cast_fp16_8))[name = tensor<string, []>("aw_137_cast_fp16")];
+            tensor<string, []> aw_139_equation_0 = const()[name = tensor<string, []>("aw_139_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_139_cast_fp16 = einsum(equation = aw_139_equation_0, values = (var_1175_cast_fp16_9, var_1161_cast_fp16_9))[name = tensor<string, []>("aw_139_cast_fp16")];
+            tensor<string, []> aw_141_equation_0 = const()[name = tensor<string, []>("aw_141_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_141_cast_fp16 = einsum(equation = aw_141_equation_0, values = (var_1175_cast_fp16_10, var_1161_cast_fp16_10))[name = tensor<string, []>("aw_141_cast_fp16")];
+            tensor<string, []> aw_143_equation_0 = const()[name = tensor<string, []>("aw_143_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_143_cast_fp16 = einsum(equation = aw_143_equation_0, values = (var_1175_cast_fp16_11, var_1161_cast_fp16_11))[name = tensor<string, []>("aw_143_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1225_cast_fp16 = softmax(axis = var_1109, x = aw_121_cast_fp16)[name = tensor<string, []>("op_1225_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1226_cast_fp16 = softmax(axis = var_1109, x = aw_123_cast_fp16)[name = tensor<string, []>("op_1226_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1227_cast_fp16 = softmax(axis = var_1109, x = aw_125_cast_fp16)[name = tensor<string, []>("op_1227_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1228_cast_fp16 = softmax(axis = var_1109, x = aw_127_cast_fp16)[name = tensor<string, []>("op_1228_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1229_cast_fp16 = softmax(axis = var_1109, x = aw_129_cast_fp16)[name = tensor<string, []>("op_1229_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1230_cast_fp16 = softmax(axis = var_1109, x = aw_131_cast_fp16)[name = tensor<string, []>("op_1230_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1231_cast_fp16 = softmax(axis = var_1109, x = aw_133_cast_fp16)[name = tensor<string, []>("op_1231_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1232_cast_fp16 = softmax(axis = var_1109, x = aw_135_cast_fp16)[name = tensor<string, []>("op_1232_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1233_cast_fp16 = softmax(axis = var_1109, x = aw_137_cast_fp16)[name = tensor<string, []>("op_1233_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1234_cast_fp16 = softmax(axis = var_1109, x = aw_139_cast_fp16)[name = tensor<string, []>("op_1234_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1235_cast_fp16 = softmax(axis = var_1109, x = aw_141_cast_fp16)[name = tensor<string, []>("op_1235_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1236_cast_fp16 = softmax(axis = var_1109, x = aw_143_cast_fp16)[name = tensor<string, []>("op_1236_cast_fp16")];
+            tensor<string, []> var_1238_equation_0 = const()[name = tensor<string, []>("op_1238_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1238_cast_fp16 = einsum(equation = var_1238_equation_0, values = (var_1188_cast_fp16_0, var_1225_cast_fp16))[name = tensor<string, []>("op_1238_cast_fp16")];
+            tensor<string, []> var_1240_equation_0 = const()[name = tensor<string, []>("op_1240_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1188_cast_fp16_1, var_1226_cast_fp16))[name = tensor<string, []>("op_1240_cast_fp16")];
+            tensor<string, []> var_1242_equation_0 = const()[name = tensor<string, []>("op_1242_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1242_cast_fp16 = einsum(equation = var_1242_equation_0, values = (var_1188_cast_fp16_2, var_1227_cast_fp16))[name = tensor<string, []>("op_1242_cast_fp16")];
+            tensor<string, []> var_1244_equation_0 = const()[name = tensor<string, []>("op_1244_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1188_cast_fp16_3, var_1228_cast_fp16))[name = tensor<string, []>("op_1244_cast_fp16")];
+            tensor<string, []> var_1246_equation_0 = const()[name = tensor<string, []>("op_1246_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1246_cast_fp16 = einsum(equation = var_1246_equation_0, values = (var_1188_cast_fp16_4, var_1229_cast_fp16))[name = tensor<string, []>("op_1246_cast_fp16")];
+            tensor<string, []> var_1248_equation_0 = const()[name = tensor<string, []>("op_1248_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1188_cast_fp16_5, var_1230_cast_fp16))[name = tensor<string, []>("op_1248_cast_fp16")];
+            tensor<string, []> var_1250_equation_0 = const()[name = tensor<string, []>("op_1250_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1250_cast_fp16 = einsum(equation = var_1250_equation_0, values = (var_1188_cast_fp16_6, var_1231_cast_fp16))[name = tensor<string, []>("op_1250_cast_fp16")];
+            tensor<string, []> var_1252_equation_0 = const()[name = tensor<string, []>("op_1252_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1188_cast_fp16_7, var_1232_cast_fp16))[name = tensor<string, []>("op_1252_cast_fp16")];
+            tensor<string, []> var_1254_equation_0 = const()[name = tensor<string, []>("op_1254_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1254_cast_fp16 = einsum(equation = var_1254_equation_0, values = (var_1188_cast_fp16_8, var_1233_cast_fp16))[name = tensor<string, []>("op_1254_cast_fp16")];
+            tensor<string, []> var_1256_equation_0 = const()[name = tensor<string, []>("op_1256_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1188_cast_fp16_9, var_1234_cast_fp16))[name = tensor<string, []>("op_1256_cast_fp16")];
+            tensor<string, []> var_1258_equation_0 = const()[name = tensor<string, []>("op_1258_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1258_cast_fp16 = einsum(equation = var_1258_equation_0, values = (var_1188_cast_fp16_10, var_1235_cast_fp16))[name = tensor<string, []>("op_1258_cast_fp16")];
+            tensor<string, []> var_1260_equation_0 = const()[name = tensor<string, []>("op_1260_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1188_cast_fp16_11, var_1236_cast_fp16))[name = tensor<string, []>("op_1260_cast_fp16")];
+            tensor<bool, []> input_55_interleave_0 = const()[name = tensor<string, []>("input_55_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_55_cast_fp16 = concat(axis = var_1109, interleave = input_55_interleave_0, values = (var_1238_cast_fp16, var_1240_cast_fp16, var_1242_cast_fp16, var_1244_cast_fp16, var_1246_cast_fp16, var_1248_cast_fp16, var_1250_cast_fp16, var_1252_cast_fp16, var_1254_cast_fp16, var_1256_cast_fp16, var_1258_cast_fp16, var_1260_cast_fp16))[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<string, []> var_1269_pad_type_0 = const()[name = tensor<string, []>("op_1269_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1269_strides_0 = const()[name = tensor<string, []>("op_1269_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1269_pad_0 = const()[name = tensor<string, []>("op_1269_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1269_dilations_0 = const()[name = tensor<string, []>("op_1269_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1269_groups_0 = const()[name = tensor<string, []>("op_1269_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_5_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80636416)))];
+            tensor<fp16, [768]> blocks_5_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81816128)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1269_cast_fp16 = conv(bias = blocks_5_attn_out_bias_to_fp16, dilations = var_1269_dilations_0, groups = var_1269_groups_0, pad = var_1269_pad_0, pad_type = var_1269_pad_type_0, strides = var_1269_strides_0, weight = blocks_5_attn_out_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("op_1269_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = var_1269_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> input_57_axes_0 = const()[name = tensor<string, []>("input_57_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_57_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_57_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81817728)))];
+            tensor<fp16, [768]> input_57_beta_0_to_fp16 = const()[name = tensor<string, []>("input_57_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81819328)))];
+            tensor<fp16, []> var_1279_to_fp16 = const()[name = tensor<string, []>("op_1279_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_57_cast_fp16 = layer_norm(axes = input_57_axes_0, beta = input_57_beta_0_to_fp16, epsilon = var_1279_to_fp16, gamma = input_57_gamma_0_to_fp16, x = inputs_23_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<string, []> input_59_pad_type_0 = const()[name = tensor<string, []>("input_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_59_strides_0 = const()[name = tensor<string, []>("input_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_59_pad_0 = const()[name = tensor<string, []>("input_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_59_dilations_0 = const()[name = tensor<string, []>("input_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_59_groups_0 = const()[name = tensor<string, []>("input_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_5_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81820928)))];
+            tensor<fp16, [3072]> blocks_5_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86539584)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_59_cast_fp16 = conv(bias = blocks_5_mlp_0_bias_to_fp16, dilations = input_59_dilations_0, groups = input_59_groups_0, pad = input_59_pad_0, pad_type = input_59_pad_type_0, strides = input_59_strides_0, weight = blocks_5_mlp_0_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<string, []> input_61_mode_0 = const()[name = tensor<string, []>("input_61_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_61_cast_fp16 = gelu(mode = input_61_mode_0, x = input_59_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
+            tensor<string, []> var_1305_pad_type_0 = const()[name = tensor<string, []>("op_1305_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1305_strides_0 = const()[name = tensor<string, []>("op_1305_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1305_pad_0 = const()[name = tensor<string, []>("op_1305_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1305_dilations_0 = const()[name = tensor<string, []>("op_1305_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1305_groups_0 = const()[name = tensor<string, []>("op_1305_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_5_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86545792)))];
+            tensor<fp16, [768]> blocks_5_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91264448)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1305_cast_fp16 = conv(bias = blocks_5_mlp_2_bias_to_fp16, dilations = var_1305_dilations_0, groups = var_1305_groups_0, pad = var_1305_pad_0, pad_type = var_1305_pad_type_0, strides = var_1305_strides_0, weight = blocks_5_mlp_2_weight_to_fp16, x = input_61_cast_fp16)[name = tensor<string, []>("op_1305_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = var_1305_cast_fp16)[name = tensor<string, []>("inputs_25_cast_fp16")];
+            tensor<int32, []> var_1314 = const()[name = tensor<string, []>("op_1314"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_63_axes_0 = const()[name = tensor<string, []>("input_63_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_63_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_63_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91266048)))];
+            tensor<fp16, [768]> input_63_beta_0_to_fp16 = const()[name = tensor<string, []>("input_63_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91267648)))];
+            tensor<fp16, []> var_1330_to_fp16 = const()[name = tensor<string, []>("op_1330_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_63_cast_fp16 = layer_norm(axes = input_63_axes_0, beta = input_63_beta_0_to_fp16, epsilon = var_1330_to_fp16, gamma = input_63_gamma_0_to_fp16, x = inputs_25_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
+            tensor<string, []> q_13_pad_type_0 = const()[name = tensor<string, []>("q_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_13_strides_0 = const()[name = tensor<string, []>("q_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_13_pad_0 = const()[name = tensor<string, []>("q_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_13_dilations_0 = const()[name = tensor<string, []>("q_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_13_groups_0 = const()[name = tensor<string, []>("q_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_1365_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1365_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91269248)))];
+            tensor<fp16, [768]> var_1365_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1365_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92448960)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1365_cast_fp16 = conv(bias = var_1365_bias_0_to_fp16, dilations = q_13_dilations_0, groups = q_13_groups_0, pad = q_13_pad_0, pad_type = q_13_pad_type_0, strides = q_13_strides_0, weight = var_1365_weight_0_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("op_1365_cast_fp16")];
+            tensor<string, []> k_13_pad_type_0 = const()[name = tensor<string, []>("k_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_13_strides_0 = const()[name = tensor<string, []>("k_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_13_pad_0 = const()[name = tensor<string, []>("k_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_13_dilations_0 = const()[name = tensor<string, []>("k_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_13_groups_0 = const()[name = tensor<string, []>("k_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_6_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92450560)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_13_cast_fp16 = conv(dilations = k_13_dilations_0, groups = k_13_groups_0, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = k_13_strides_0, weight = blocks_6_attn_key_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("k_13_cast_fp16")];
+            tensor<string, []> var_1363_pad_type_0 = const()[name = tensor<string, []>("op_1363_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1363_strides_0 = const()[name = tensor<string, []>("op_1363_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1363_pad_0 = const()[name = tensor<string, []>("op_1363_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1363_dilations_0 = const()[name = tensor<string, []>("op_1363_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1363_groups_0 = const()[name = tensor<string, []>("op_1363_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_6_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93630272)))];
+            tensor<fp16, [768]> blocks_6_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94809984)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1363_cast_fp16 = conv(bias = blocks_6_attn_value_bias_to_fp16, dilations = var_1363_dilations_0, groups = var_1363_groups_0, pad = var_1363_pad_0, pad_type = var_1363_pad_type_0, strides = var_1363_strides_0, weight = blocks_6_attn_value_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("op_1363_cast_fp16")];
+            tensor<int32, [12]> tile_18 = const()[name = tensor<string, []>("tile_18"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1366_axis_0 = const()[name = tensor<string, []>("op_1366_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_11 = split(axis = var_1366_axis_0, split_sizes = tile_18, x = var_1365_cast_fp16)[name = tensor<string, []>("op_1366_cast_fp16")];
+            tensor<int32, [4]> var_1379_perm_0 = const()[name = tensor<string, []>("op_1379_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_19 = const()[name = tensor<string, []>("tile_19"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1380_axis_0 = const()[name = tensor<string, []>("op_1380_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_1379_cast_fp16 = transpose(perm = var_1379_perm_0, x = k_13_cast_fp16)[name = tensor<string, []>("transpose_6")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_11 = split(axis = var_1380_axis_0, split_sizes = tile_19, x = var_1379_cast_fp16)[name = tensor<string, []>("op_1380_cast_fp16")];
+            tensor<int32, [12]> tile_20 = const()[name = tensor<string, []>("tile_20"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1393_axis_0 = const()[name = tensor<string, []>("op_1393_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_11 = split(axis = var_1393_axis_0, split_sizes = tile_20, x = var_1363_cast_fp16)[name = tensor<string, []>("op_1393_cast_fp16")];
+            tensor<string, []> aw_145_equation_0 = const()[name = tensor<string, []>("aw_145_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_145_cast_fp16 = einsum(equation = aw_145_equation_0, values = (var_1380_cast_fp16_0, var_1366_cast_fp16_0))[name = tensor<string, []>("aw_145_cast_fp16")];
+            tensor<string, []> aw_147_equation_0 = const()[name = tensor<string, []>("aw_147_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_147_cast_fp16 = einsum(equation = aw_147_equation_0, values = (var_1380_cast_fp16_1, var_1366_cast_fp16_1))[name = tensor<string, []>("aw_147_cast_fp16")];
+            tensor<string, []> aw_149_equation_0 = const()[name = tensor<string, []>("aw_149_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_149_cast_fp16 = einsum(equation = aw_149_equation_0, values = (var_1380_cast_fp16_2, var_1366_cast_fp16_2))[name = tensor<string, []>("aw_149_cast_fp16")];
+            tensor<string, []> aw_151_equation_0 = const()[name = tensor<string, []>("aw_151_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_151_cast_fp16 = einsum(equation = aw_151_equation_0, values = (var_1380_cast_fp16_3, var_1366_cast_fp16_3))[name = tensor<string, []>("aw_151_cast_fp16")];
+            tensor<string, []> aw_153_equation_0 = const()[name = tensor<string, []>("aw_153_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_153_cast_fp16 = einsum(equation = aw_153_equation_0, values = (var_1380_cast_fp16_4, var_1366_cast_fp16_4))[name = tensor<string, []>("aw_153_cast_fp16")];
+            tensor<string, []> aw_155_equation_0 = const()[name = tensor<string, []>("aw_155_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_155_cast_fp16 = einsum(equation = aw_155_equation_0, values = (var_1380_cast_fp16_5, var_1366_cast_fp16_5))[name = tensor<string, []>("aw_155_cast_fp16")];
+            tensor<string, []> aw_157_equation_0 = const()[name = tensor<string, []>("aw_157_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_157_cast_fp16 = einsum(equation = aw_157_equation_0, values = (var_1380_cast_fp16_6, var_1366_cast_fp16_6))[name = tensor<string, []>("aw_157_cast_fp16")];
+            tensor<string, []> aw_159_equation_0 = const()[name = tensor<string, []>("aw_159_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_159_cast_fp16 = einsum(equation = aw_159_equation_0, values = (var_1380_cast_fp16_7, var_1366_cast_fp16_7))[name = tensor<string, []>("aw_159_cast_fp16")];
+            tensor<string, []> aw_161_equation_0 = const()[name = tensor<string, []>("aw_161_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_161_cast_fp16 = einsum(equation = aw_161_equation_0, values = (var_1380_cast_fp16_8, var_1366_cast_fp16_8))[name = tensor<string, []>("aw_161_cast_fp16")];
+            tensor<string, []> aw_163_equation_0 = const()[name = tensor<string, []>("aw_163_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_163_cast_fp16 = einsum(equation = aw_163_equation_0, values = (var_1380_cast_fp16_9, var_1366_cast_fp16_9))[name = tensor<string, []>("aw_163_cast_fp16")];
+            tensor<string, []> aw_165_equation_0 = const()[name = tensor<string, []>("aw_165_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_165_cast_fp16 = einsum(equation = aw_165_equation_0, values = (var_1380_cast_fp16_10, var_1366_cast_fp16_10))[name = tensor<string, []>("aw_165_cast_fp16")];
+            tensor<string, []> aw_167_equation_0 = const()[name = tensor<string, []>("aw_167_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_167_cast_fp16 = einsum(equation = aw_167_equation_0, values = (var_1380_cast_fp16_11, var_1366_cast_fp16_11))[name = tensor<string, []>("aw_167_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1430_cast_fp16 = softmax(axis = var_1314, x = aw_145_cast_fp16)[name = tensor<string, []>("op_1430_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1431_cast_fp16 = softmax(axis = var_1314, x = aw_147_cast_fp16)[name = tensor<string, []>("op_1431_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1432_cast_fp16 = softmax(axis = var_1314, x = aw_149_cast_fp16)[name = tensor<string, []>("op_1432_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1433_cast_fp16 = softmax(axis = var_1314, x = aw_151_cast_fp16)[name = tensor<string, []>("op_1433_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1434_cast_fp16 = softmax(axis = var_1314, x = aw_153_cast_fp16)[name = tensor<string, []>("op_1434_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1435_cast_fp16 = softmax(axis = var_1314, x = aw_155_cast_fp16)[name = tensor<string, []>("op_1435_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1436_cast_fp16 = softmax(axis = var_1314, x = aw_157_cast_fp16)[name = tensor<string, []>("op_1436_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1437_cast_fp16 = softmax(axis = var_1314, x = aw_159_cast_fp16)[name = tensor<string, []>("op_1437_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1438_cast_fp16 = softmax(axis = var_1314, x = aw_161_cast_fp16)[name = tensor<string, []>("op_1438_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1439_cast_fp16 = softmax(axis = var_1314, x = aw_163_cast_fp16)[name = tensor<string, []>("op_1439_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1440_cast_fp16 = softmax(axis = var_1314, x = aw_165_cast_fp16)[name = tensor<string, []>("op_1440_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1441_cast_fp16 = softmax(axis = var_1314, x = aw_167_cast_fp16)[name = tensor<string, []>("op_1441_cast_fp16")];
+            tensor<string, []> var_1443_equation_0 = const()[name = tensor<string, []>("op_1443_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1443_cast_fp16 = einsum(equation = var_1443_equation_0, values = (var_1393_cast_fp16_0, var_1430_cast_fp16))[name = tensor<string, []>("op_1443_cast_fp16")];
+            tensor<string, []> var_1445_equation_0 = const()[name = tensor<string, []>("op_1445_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1445_cast_fp16 = einsum(equation = var_1445_equation_0, values = (var_1393_cast_fp16_1, var_1431_cast_fp16))[name = tensor<string, []>("op_1445_cast_fp16")];
+            tensor<string, []> var_1447_equation_0 = const()[name = tensor<string, []>("op_1447_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1447_cast_fp16 = einsum(equation = var_1447_equation_0, values = (var_1393_cast_fp16_2, var_1432_cast_fp16))[name = tensor<string, []>("op_1447_cast_fp16")];
+            tensor<string, []> var_1449_equation_0 = const()[name = tensor<string, []>("op_1449_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1449_cast_fp16 = einsum(equation = var_1449_equation_0, values = (var_1393_cast_fp16_3, var_1433_cast_fp16))[name = tensor<string, []>("op_1449_cast_fp16")];
+            tensor<string, []> var_1451_equation_0 = const()[name = tensor<string, []>("op_1451_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1451_cast_fp16 = einsum(equation = var_1451_equation_0, values = (var_1393_cast_fp16_4, var_1434_cast_fp16))[name = tensor<string, []>("op_1451_cast_fp16")];
+            tensor<string, []> var_1453_equation_0 = const()[name = tensor<string, []>("op_1453_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1453_cast_fp16 = einsum(equation = var_1453_equation_0, values = (var_1393_cast_fp16_5, var_1435_cast_fp16))[name = tensor<string, []>("op_1453_cast_fp16")];
+            tensor<string, []> var_1455_equation_0 = const()[name = tensor<string, []>("op_1455_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1455_cast_fp16 = einsum(equation = var_1455_equation_0, values = (var_1393_cast_fp16_6, var_1436_cast_fp16))[name = tensor<string, []>("op_1455_cast_fp16")];
+            tensor<string, []> var_1457_equation_0 = const()[name = tensor<string, []>("op_1457_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1457_cast_fp16 = einsum(equation = var_1457_equation_0, values = (var_1393_cast_fp16_7, var_1437_cast_fp16))[name = tensor<string, []>("op_1457_cast_fp16")];
+            tensor<string, []> var_1459_equation_0 = const()[name = tensor<string, []>("op_1459_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1459_cast_fp16 = einsum(equation = var_1459_equation_0, values = (var_1393_cast_fp16_8, var_1438_cast_fp16))[name = tensor<string, []>("op_1459_cast_fp16")];
+            tensor<string, []> var_1461_equation_0 = const()[name = tensor<string, []>("op_1461_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1461_cast_fp16 = einsum(equation = var_1461_equation_0, values = (var_1393_cast_fp16_9, var_1439_cast_fp16))[name = tensor<string, []>("op_1461_cast_fp16")];
+            tensor<string, []> var_1463_equation_0 = const()[name = tensor<string, []>("op_1463_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1463_cast_fp16 = einsum(equation = var_1463_equation_0, values = (var_1393_cast_fp16_10, var_1440_cast_fp16))[name = tensor<string, []>("op_1463_cast_fp16")];
+            tensor<string, []> var_1465_equation_0 = const()[name = tensor<string, []>("op_1465_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1465_cast_fp16 = einsum(equation = var_1465_equation_0, values = (var_1393_cast_fp16_11, var_1441_cast_fp16))[name = tensor<string, []>("op_1465_cast_fp16")];
+            tensor<bool, []> input_65_interleave_0 = const()[name = tensor<string, []>("input_65_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_65_cast_fp16 = concat(axis = var_1314, interleave = input_65_interleave_0, values = (var_1443_cast_fp16, var_1445_cast_fp16, var_1447_cast_fp16, var_1449_cast_fp16, var_1451_cast_fp16, var_1453_cast_fp16, var_1455_cast_fp16, var_1457_cast_fp16, var_1459_cast_fp16, var_1461_cast_fp16, var_1463_cast_fp16, var_1465_cast_fp16))[name = tensor<string, []>("input_65_cast_fp16")];
+            tensor<string, []> var_1474_pad_type_0 = const()[name = tensor<string, []>("op_1474_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1474_strides_0 = const()[name = tensor<string, []>("op_1474_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1474_pad_0 = const()[name = tensor<string, []>("op_1474_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1474_dilations_0 = const()[name = tensor<string, []>("op_1474_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1474_groups_0 = const()[name = tensor<string, []>("op_1474_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_6_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94811584)))];
+            tensor<fp16, [768]> blocks_6_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95991296)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1474_cast_fp16 = conv(bias = blocks_6_attn_out_bias_to_fp16, dilations = var_1474_dilations_0, groups = var_1474_groups_0, pad = var_1474_pad_0, pad_type = var_1474_pad_type_0, strides = var_1474_strides_0, weight = blocks_6_attn_out_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("op_1474_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = var_1474_cast_fp16)[name = tensor<string, []>("inputs_27_cast_fp16")];
+            tensor<int32, [1]> input_67_axes_0 = const()[name = tensor<string, []>("input_67_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_67_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_67_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95992896)))];
+            tensor<fp16, [768]> input_67_beta_0_to_fp16 = const()[name = tensor<string, []>("input_67_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95994496)))];
+            tensor<fp16, []> var_1484_to_fp16 = const()[name = tensor<string, []>("op_1484_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_67_cast_fp16 = layer_norm(axes = input_67_axes_0, beta = input_67_beta_0_to_fp16, epsilon = var_1484_to_fp16, gamma = input_67_gamma_0_to_fp16, x = inputs_27_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
+            tensor<string, []> input_69_pad_type_0 = const()[name = tensor<string, []>("input_69_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_69_strides_0 = const()[name = tensor<string, []>("input_69_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_69_pad_0 = const()[name = tensor<string, []>("input_69_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_69_dilations_0 = const()[name = tensor<string, []>("input_69_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_69_groups_0 = const()[name = tensor<string, []>("input_69_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_6_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95996096)))];
+            tensor<fp16, [3072]> blocks_6_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100714752)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_69_cast_fp16 = conv(bias = blocks_6_mlp_0_bias_to_fp16, dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = blocks_6_mlp_0_weight_to_fp16, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
+            tensor<string, []> input_71_mode_0 = const()[name = tensor<string, []>("input_71_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
+            tensor<string, []> var_1510_pad_type_0 = const()[name = tensor<string, []>("op_1510_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1510_strides_0 = const()[name = tensor<string, []>("op_1510_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1510_pad_0 = const()[name = tensor<string, []>("op_1510_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1510_dilations_0 = const()[name = tensor<string, []>("op_1510_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1510_groups_0 = const()[name = tensor<string, []>("op_1510_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_6_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100720960)))];
+            tensor<fp16, [768]> blocks_6_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105439616)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1510_cast_fp16 = conv(bias = blocks_6_mlp_2_bias_to_fp16, dilations = var_1510_dilations_0, groups = var_1510_groups_0, pad = var_1510_pad_0, pad_type = var_1510_pad_type_0, strides = var_1510_strides_0, weight = blocks_6_mlp_2_weight_to_fp16, x = input_71_cast_fp16)[name = tensor<string, []>("op_1510_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = var_1510_cast_fp16)[name = tensor<string, []>("inputs_29_cast_fp16")];
+            tensor<int32, []> var_1519 = const()[name = tensor<string, []>("op_1519"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_73_axes_0 = const()[name = tensor<string, []>("input_73_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_73_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_73_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105441216)))];
+            tensor<fp16, [768]> input_73_beta_0_to_fp16 = const()[name = tensor<string, []>("input_73_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105442816)))];
+            tensor<fp16, []> var_1535_to_fp16 = const()[name = tensor<string, []>("op_1535_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_73_cast_fp16 = layer_norm(axes = input_73_axes_0, beta = input_73_beta_0_to_fp16, epsilon = var_1535_to_fp16, gamma = input_73_gamma_0_to_fp16, x = inputs_29_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
+            tensor<string, []> q_15_pad_type_0 = const()[name = tensor<string, []>("q_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_15_strides_0 = const()[name = tensor<string, []>("q_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_15_pad_0 = const()[name = tensor<string, []>("q_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_15_dilations_0 = const()[name = tensor<string, []>("q_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_15_groups_0 = const()[name = tensor<string, []>("q_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_1570_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1570_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105444416)))];
+            tensor<fp16, [768]> var_1570_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1570_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106624128)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1570_cast_fp16 = conv(bias = var_1570_bias_0_to_fp16, dilations = q_15_dilations_0, groups = q_15_groups_0, pad = q_15_pad_0, pad_type = q_15_pad_type_0, strides = q_15_strides_0, weight = var_1570_weight_0_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("op_1570_cast_fp16")];
+            tensor<string, []> k_15_pad_type_0 = const()[name = tensor<string, []>("k_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_15_strides_0 = const()[name = tensor<string, []>("k_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_15_pad_0 = const()[name = tensor<string, []>("k_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_15_dilations_0 = const()[name = tensor<string, []>("k_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_15_groups_0 = const()[name = tensor<string, []>("k_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_7_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106625728)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_15_cast_fp16 = conv(dilations = k_15_dilations_0, groups = k_15_groups_0, pad = k_15_pad_0, pad_type = k_15_pad_type_0, strides = k_15_strides_0, weight = blocks_7_attn_key_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("k_15_cast_fp16")];
+            tensor<string, []> var_1568_pad_type_0 = const()[name = tensor<string, []>("op_1568_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1568_strides_0 = const()[name = tensor<string, []>("op_1568_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1568_pad_0 = const()[name = tensor<string, []>("op_1568_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1568_dilations_0 = const()[name = tensor<string, []>("op_1568_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1568_groups_0 = const()[name = tensor<string, []>("op_1568_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_7_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107805440)))];
+            tensor<fp16, [768]> blocks_7_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108985152)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1568_cast_fp16 = conv(bias = blocks_7_attn_value_bias_to_fp16, dilations = var_1568_dilations_0, groups = var_1568_groups_0, pad = var_1568_pad_0, pad_type = var_1568_pad_type_0, strides = var_1568_strides_0, weight = blocks_7_attn_value_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("op_1568_cast_fp16")];
+            tensor<int32, [12]> tile_21 = const()[name = tensor<string, []>("tile_21"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1571_axis_0 = const()[name = tensor<string, []>("op_1571_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_11 = split(axis = var_1571_axis_0, split_sizes = tile_21, x = var_1570_cast_fp16)[name = tensor<string, []>("op_1571_cast_fp16")];
+            tensor<int32, [4]> var_1584_perm_0 = const()[name = tensor<string, []>("op_1584_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_22 = const()[name = tensor<string, []>("tile_22"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1585_axis_0 = const()[name = tensor<string, []>("op_1585_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_1584_cast_fp16 = transpose(perm = var_1584_perm_0, x = k_15_cast_fp16)[name = tensor<string, []>("transpose_5")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_11 = split(axis = var_1585_axis_0, split_sizes = tile_22, x = var_1584_cast_fp16)[name = tensor<string, []>("op_1585_cast_fp16")];
+            tensor<int32, [12]> tile_23 = const()[name = tensor<string, []>("tile_23"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1598_axis_0 = const()[name = tensor<string, []>("op_1598_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_11 = split(axis = var_1598_axis_0, split_sizes = tile_23, x = var_1568_cast_fp16)[name = tensor<string, []>("op_1598_cast_fp16")];
+            tensor<string, []> aw_169_equation_0 = const()[name = tensor<string, []>("aw_169_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_169_cast_fp16 = einsum(equation = aw_169_equation_0, values = (var_1585_cast_fp16_0, var_1571_cast_fp16_0))[name = tensor<string, []>("aw_169_cast_fp16")];
+            tensor<string, []> aw_171_equation_0 = const()[name = tensor<string, []>("aw_171_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_171_cast_fp16 = einsum(equation = aw_171_equation_0, values = (var_1585_cast_fp16_1, var_1571_cast_fp16_1))[name = tensor<string, []>("aw_171_cast_fp16")];
+            tensor<string, []> aw_173_equation_0 = const()[name = tensor<string, []>("aw_173_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_173_cast_fp16 = einsum(equation = aw_173_equation_0, values = (var_1585_cast_fp16_2, var_1571_cast_fp16_2))[name = tensor<string, []>("aw_173_cast_fp16")];
+            tensor<string, []> aw_175_equation_0 = const()[name = tensor<string, []>("aw_175_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_175_cast_fp16 = einsum(equation = aw_175_equation_0, values = (var_1585_cast_fp16_3, var_1571_cast_fp16_3))[name = tensor<string, []>("aw_175_cast_fp16")];
+            tensor<string, []> aw_177_equation_0 = const()[name = tensor<string, []>("aw_177_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_177_cast_fp16 = einsum(equation = aw_177_equation_0, values = (var_1585_cast_fp16_4, var_1571_cast_fp16_4))[name = tensor<string, []>("aw_177_cast_fp16")];
+            tensor<string, []> aw_179_equation_0 = const()[name = tensor<string, []>("aw_179_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_179_cast_fp16 = einsum(equation = aw_179_equation_0, values = (var_1585_cast_fp16_5, var_1571_cast_fp16_5))[name = tensor<string, []>("aw_179_cast_fp16")];
+            tensor<string, []> aw_181_equation_0 = const()[name = tensor<string, []>("aw_181_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_181_cast_fp16 = einsum(equation = aw_181_equation_0, values = (var_1585_cast_fp16_6, var_1571_cast_fp16_6))[name = tensor<string, []>("aw_181_cast_fp16")];
+            tensor<string, []> aw_183_equation_0 = const()[name = tensor<string, []>("aw_183_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_183_cast_fp16 = einsum(equation = aw_183_equation_0, values = (var_1585_cast_fp16_7, var_1571_cast_fp16_7))[name = tensor<string, []>("aw_183_cast_fp16")];
+            tensor<string, []> aw_185_equation_0 = const()[name = tensor<string, []>("aw_185_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_185_cast_fp16 = einsum(equation = aw_185_equation_0, values = (var_1585_cast_fp16_8, var_1571_cast_fp16_8))[name = tensor<string, []>("aw_185_cast_fp16")];
+            tensor<string, []> aw_187_equation_0 = const()[name = tensor<string, []>("aw_187_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_187_cast_fp16 = einsum(equation = aw_187_equation_0, values = (var_1585_cast_fp16_9, var_1571_cast_fp16_9))[name = tensor<string, []>("aw_187_cast_fp16")];
+            tensor<string, []> aw_189_equation_0 = const()[name = tensor<string, []>("aw_189_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_189_cast_fp16 = einsum(equation = aw_189_equation_0, values = (var_1585_cast_fp16_10, var_1571_cast_fp16_10))[name = tensor<string, []>("aw_189_cast_fp16")];
+            tensor<string, []> aw_191_equation_0 = const()[name = tensor<string, []>("aw_191_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_191_cast_fp16 = einsum(equation = aw_191_equation_0, values = (var_1585_cast_fp16_11, var_1571_cast_fp16_11))[name = tensor<string, []>("aw_191_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1635_cast_fp16 = softmax(axis = var_1519, x = aw_169_cast_fp16)[name = tensor<string, []>("op_1635_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1636_cast_fp16 = softmax(axis = var_1519, x = aw_171_cast_fp16)[name = tensor<string, []>("op_1636_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1637_cast_fp16 = softmax(axis = var_1519, x = aw_173_cast_fp16)[name = tensor<string, []>("op_1637_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1638_cast_fp16 = softmax(axis = var_1519, x = aw_175_cast_fp16)[name = tensor<string, []>("op_1638_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1639_cast_fp16 = softmax(axis = var_1519, x = aw_177_cast_fp16)[name = tensor<string, []>("op_1639_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1640_cast_fp16 = softmax(axis = var_1519, x = aw_179_cast_fp16)[name = tensor<string, []>("op_1640_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1641_cast_fp16 = softmax(axis = var_1519, x = aw_181_cast_fp16)[name = tensor<string, []>("op_1641_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1642_cast_fp16 = softmax(axis = var_1519, x = aw_183_cast_fp16)[name = tensor<string, []>("op_1642_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1643_cast_fp16 = softmax(axis = var_1519, x = aw_185_cast_fp16)[name = tensor<string, []>("op_1643_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1644_cast_fp16 = softmax(axis = var_1519, x = aw_187_cast_fp16)[name = tensor<string, []>("op_1644_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1645_cast_fp16 = softmax(axis = var_1519, x = aw_189_cast_fp16)[name = tensor<string, []>("op_1645_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1646_cast_fp16 = softmax(axis = var_1519, x = aw_191_cast_fp16)[name = tensor<string, []>("op_1646_cast_fp16")];
+            tensor<string, []> var_1648_equation_0 = const()[name = tensor<string, []>("op_1648_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1648_cast_fp16 = einsum(equation = var_1648_equation_0, values = (var_1598_cast_fp16_0, var_1635_cast_fp16))[name = tensor<string, []>("op_1648_cast_fp16")];
+            tensor<string, []> var_1650_equation_0 = const()[name = tensor<string, []>("op_1650_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1650_cast_fp16 = einsum(equation = var_1650_equation_0, values = (var_1598_cast_fp16_1, var_1636_cast_fp16))[name = tensor<string, []>("op_1650_cast_fp16")];
+            tensor<string, []> var_1652_equation_0 = const()[name = tensor<string, []>("op_1652_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1652_cast_fp16 = einsum(equation = var_1652_equation_0, values = (var_1598_cast_fp16_2, var_1637_cast_fp16))[name = tensor<string, []>("op_1652_cast_fp16")];
+            tensor<string, []> var_1654_equation_0 = const()[name = tensor<string, []>("op_1654_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1654_cast_fp16 = einsum(equation = var_1654_equation_0, values = (var_1598_cast_fp16_3, var_1638_cast_fp16))[name = tensor<string, []>("op_1654_cast_fp16")];
+            tensor<string, []> var_1656_equation_0 = const()[name = tensor<string, []>("op_1656_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1656_cast_fp16 = einsum(equation = var_1656_equation_0, values = (var_1598_cast_fp16_4, var_1639_cast_fp16))[name = tensor<string, []>("op_1656_cast_fp16")];
+            tensor<string, []> var_1658_equation_0 = const()[name = tensor<string, []>("op_1658_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1658_cast_fp16 = einsum(equation = var_1658_equation_0, values = (var_1598_cast_fp16_5, var_1640_cast_fp16))[name = tensor<string, []>("op_1658_cast_fp16")];
+            tensor<string, []> var_1660_equation_0 = const()[name = tensor<string, []>("op_1660_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1660_cast_fp16 = einsum(equation = var_1660_equation_0, values = (var_1598_cast_fp16_6, var_1641_cast_fp16))[name = tensor<string, []>("op_1660_cast_fp16")];
+            tensor<string, []> var_1662_equation_0 = const()[name = tensor<string, []>("op_1662_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1662_cast_fp16 = einsum(equation = var_1662_equation_0, values = (var_1598_cast_fp16_7, var_1642_cast_fp16))[name = tensor<string, []>("op_1662_cast_fp16")];
+            tensor<string, []> var_1664_equation_0 = const()[name = tensor<string, []>("op_1664_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1664_cast_fp16 = einsum(equation = var_1664_equation_0, values = (var_1598_cast_fp16_8, var_1643_cast_fp16))[name = tensor<string, []>("op_1664_cast_fp16")];
+            tensor<string, []> var_1666_equation_0 = const()[name = tensor<string, []>("op_1666_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1666_cast_fp16 = einsum(equation = var_1666_equation_0, values = (var_1598_cast_fp16_9, var_1644_cast_fp16))[name = tensor<string, []>("op_1666_cast_fp16")];
+            tensor<string, []> var_1668_equation_0 = const()[name = tensor<string, []>("op_1668_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1668_cast_fp16 = einsum(equation = var_1668_equation_0, values = (var_1598_cast_fp16_10, var_1645_cast_fp16))[name = tensor<string, []>("op_1668_cast_fp16")];
+            tensor<string, []> var_1670_equation_0 = const()[name = tensor<string, []>("op_1670_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1670_cast_fp16 = einsum(equation = var_1670_equation_0, values = (var_1598_cast_fp16_11, var_1646_cast_fp16))[name = tensor<string, []>("op_1670_cast_fp16")];
+            tensor<bool, []> input_75_interleave_0 = const()[name = tensor<string, []>("input_75_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_75_cast_fp16 = concat(axis = var_1519, interleave = input_75_interleave_0, values = (var_1648_cast_fp16, var_1650_cast_fp16, var_1652_cast_fp16, var_1654_cast_fp16, var_1656_cast_fp16, var_1658_cast_fp16, var_1660_cast_fp16, var_1662_cast_fp16, var_1664_cast_fp16, var_1666_cast_fp16, var_1668_cast_fp16, var_1670_cast_fp16))[name = tensor<string, []>("input_75_cast_fp16")];
+            tensor<string, []> var_1679_pad_type_0 = const()[name = tensor<string, []>("op_1679_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1679_strides_0 = const()[name = tensor<string, []>("op_1679_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1679_pad_0 = const()[name = tensor<string, []>("op_1679_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1679_dilations_0 = const()[name = tensor<string, []>("op_1679_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1679_groups_0 = const()[name = tensor<string, []>("op_1679_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_7_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108986752)))];
+            tensor<fp16, [768]> blocks_7_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110166464)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1679_cast_fp16 = conv(bias = blocks_7_attn_out_bias_to_fp16, dilations = var_1679_dilations_0, groups = var_1679_groups_0, pad = var_1679_pad_0, pad_type = var_1679_pad_type_0, strides = var_1679_strides_0, weight = blocks_7_attn_out_weight_to_fp16, x = input_75_cast_fp16)[name = tensor<string, []>("op_1679_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = var_1679_cast_fp16)[name = tensor<string, []>("inputs_31_cast_fp16")];
+            tensor<int32, [1]> input_77_axes_0 = const()[name = tensor<string, []>("input_77_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_77_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_77_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110168064)))];
+            tensor<fp16, [768]> input_77_beta_0_to_fp16 = const()[name = tensor<string, []>("input_77_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110169664)))];
+            tensor<fp16, []> var_1689_to_fp16 = const()[name = tensor<string, []>("op_1689_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_77_cast_fp16 = layer_norm(axes = input_77_axes_0, beta = input_77_beta_0_to_fp16, epsilon = var_1689_to_fp16, gamma = input_77_gamma_0_to_fp16, x = inputs_31_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
+            tensor<string, []> input_79_pad_type_0 = const()[name = tensor<string, []>("input_79_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_79_strides_0 = const()[name = tensor<string, []>("input_79_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_79_pad_0 = const()[name = tensor<string, []>("input_79_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_79_dilations_0 = const()[name = tensor<string, []>("input_79_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_79_groups_0 = const()[name = tensor<string, []>("input_79_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_7_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110171264)))];
+            tensor<fp16, [3072]> blocks_7_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114889920)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_79_cast_fp16 = conv(bias = blocks_7_mlp_0_bias_to_fp16, dilations = input_79_dilations_0, groups = input_79_groups_0, pad = input_79_pad_0, pad_type = input_79_pad_type_0, strides = input_79_strides_0, weight = blocks_7_mlp_0_weight_to_fp16, x = input_77_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
+            tensor<string, []> input_81_mode_0 = const()[name = tensor<string, []>("input_81_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_81_cast_fp16 = gelu(mode = input_81_mode_0, x = input_79_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
+            tensor<string, []> var_1715_pad_type_0 = const()[name = tensor<string, []>("op_1715_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1715_strides_0 = const()[name = tensor<string, []>("op_1715_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1715_pad_0 = const()[name = tensor<string, []>("op_1715_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1715_dilations_0 = const()[name = tensor<string, []>("op_1715_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1715_groups_0 = const()[name = tensor<string, []>("op_1715_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_7_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114896128)))];
+            tensor<fp16, [768]> blocks_7_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119614784)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1715_cast_fp16 = conv(bias = blocks_7_mlp_2_bias_to_fp16, dilations = var_1715_dilations_0, groups = var_1715_groups_0, pad = var_1715_pad_0, pad_type = var_1715_pad_type_0, strides = var_1715_strides_0, weight = blocks_7_mlp_2_weight_to_fp16, x = input_81_cast_fp16)[name = tensor<string, []>("op_1715_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = var_1715_cast_fp16)[name = tensor<string, []>("inputs_33_cast_fp16")];
+            tensor<int32, []> var_1724 = const()[name = tensor<string, []>("op_1724"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_83_axes_0 = const()[name = tensor<string, []>("input_83_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_83_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_83_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119616384)))];
+            tensor<fp16, [768]> input_83_beta_0_to_fp16 = const()[name = tensor<string, []>("input_83_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119617984)))];
+            tensor<fp16, []> var_1740_to_fp16 = const()[name = tensor<string, []>("op_1740_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_83_cast_fp16 = layer_norm(axes = input_83_axes_0, beta = input_83_beta_0_to_fp16, epsilon = var_1740_to_fp16, gamma = input_83_gamma_0_to_fp16, x = inputs_33_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
+            tensor<string, []> q_17_pad_type_0 = const()[name = tensor<string, []>("q_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_17_strides_0 = const()[name = tensor<string, []>("q_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_17_pad_0 = const()[name = tensor<string, []>("q_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_17_dilations_0 = const()[name = tensor<string, []>("q_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_17_groups_0 = const()[name = tensor<string, []>("q_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_1775_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1775_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119619584)))];
+            tensor<fp16, [768]> var_1775_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1775_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120799296)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1775_cast_fp16 = conv(bias = var_1775_bias_0_to_fp16, dilations = q_17_dilations_0, groups = q_17_groups_0, pad = q_17_pad_0, pad_type = q_17_pad_type_0, strides = q_17_strides_0, weight = var_1775_weight_0_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("op_1775_cast_fp16")];
+            tensor<string, []> k_17_pad_type_0 = const()[name = tensor<string, []>("k_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_17_strides_0 = const()[name = tensor<string, []>("k_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_17_pad_0 = const()[name = tensor<string, []>("k_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_17_dilations_0 = const()[name = tensor<string, []>("k_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_17_groups_0 = const()[name = tensor<string, []>("k_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_8_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120800896)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_17_cast_fp16 = conv(dilations = k_17_dilations_0, groups = k_17_groups_0, pad = k_17_pad_0, pad_type = k_17_pad_type_0, strides = k_17_strides_0, weight = blocks_8_attn_key_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("k_17_cast_fp16")];
+            tensor<string, []> var_1773_pad_type_0 = const()[name = tensor<string, []>("op_1773_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1773_strides_0 = const()[name = tensor<string, []>("op_1773_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1773_pad_0 = const()[name = tensor<string, []>("op_1773_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1773_dilations_0 = const()[name = tensor<string, []>("op_1773_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1773_groups_0 = const()[name = tensor<string, []>("op_1773_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_8_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(121980608)))];
+            tensor<fp16, [768]> blocks_8_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123160320)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1773_cast_fp16 = conv(bias = blocks_8_attn_value_bias_to_fp16, dilations = var_1773_dilations_0, groups = var_1773_groups_0, pad = var_1773_pad_0, pad_type = var_1773_pad_type_0, strides = var_1773_strides_0, weight = blocks_8_attn_value_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("op_1773_cast_fp16")];
+            tensor<int32, [12]> tile_24 = const()[name = tensor<string, []>("tile_24"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1776_axis_0 = const()[name = tensor<string, []>("op_1776_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_11 = split(axis = var_1776_axis_0, split_sizes = tile_24, x = var_1775_cast_fp16)[name = tensor<string, []>("op_1776_cast_fp16")];
+            tensor<int32, [4]> var_1789_perm_0 = const()[name = tensor<string, []>("op_1789_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_25 = const()[name = tensor<string, []>("tile_25"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1790_axis_0 = const()[name = tensor<string, []>("op_1790_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_1789_cast_fp16 = transpose(perm = var_1789_perm_0, x = k_17_cast_fp16)[name = tensor<string, []>("transpose_4")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_11 = split(axis = var_1790_axis_0, split_sizes = tile_25, x = var_1789_cast_fp16)[name = tensor<string, []>("op_1790_cast_fp16")];
+            tensor<int32, [12]> tile_26 = const()[name = tensor<string, []>("tile_26"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1803_axis_0 = const()[name = tensor<string, []>("op_1803_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_11 = split(axis = var_1803_axis_0, split_sizes = tile_26, x = var_1773_cast_fp16)[name = tensor<string, []>("op_1803_cast_fp16")];
+            tensor<string, []> aw_193_equation_0 = const()[name = tensor<string, []>("aw_193_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_193_cast_fp16 = einsum(equation = aw_193_equation_0, values = (var_1790_cast_fp16_0, var_1776_cast_fp16_0))[name = tensor<string, []>("aw_193_cast_fp16")];
+            tensor<string, []> aw_195_equation_0 = const()[name = tensor<string, []>("aw_195_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_195_cast_fp16 = einsum(equation = aw_195_equation_0, values = (var_1790_cast_fp16_1, var_1776_cast_fp16_1))[name = tensor<string, []>("aw_195_cast_fp16")];
+            tensor<string, []> aw_197_equation_0 = const()[name = tensor<string, []>("aw_197_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_197_cast_fp16 = einsum(equation = aw_197_equation_0, values = (var_1790_cast_fp16_2, var_1776_cast_fp16_2))[name = tensor<string, []>("aw_197_cast_fp16")];
+            tensor<string, []> aw_199_equation_0 = const()[name = tensor<string, []>("aw_199_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_199_cast_fp16 = einsum(equation = aw_199_equation_0, values = (var_1790_cast_fp16_3, var_1776_cast_fp16_3))[name = tensor<string, []>("aw_199_cast_fp16")];
+            tensor<string, []> aw_201_equation_0 = const()[name = tensor<string, []>("aw_201_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_201_cast_fp16 = einsum(equation = aw_201_equation_0, values = (var_1790_cast_fp16_4, var_1776_cast_fp16_4))[name = tensor<string, []>("aw_201_cast_fp16")];
+            tensor<string, []> aw_203_equation_0 = const()[name = tensor<string, []>("aw_203_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_203_cast_fp16 = einsum(equation = aw_203_equation_0, values = (var_1790_cast_fp16_5, var_1776_cast_fp16_5))[name = tensor<string, []>("aw_203_cast_fp16")];
+            tensor<string, []> aw_205_equation_0 = const()[name = tensor<string, []>("aw_205_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_205_cast_fp16 = einsum(equation = aw_205_equation_0, values = (var_1790_cast_fp16_6, var_1776_cast_fp16_6))[name = tensor<string, []>("aw_205_cast_fp16")];
+            tensor<string, []> aw_207_equation_0 = const()[name = tensor<string, []>("aw_207_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_207_cast_fp16 = einsum(equation = aw_207_equation_0, values = (var_1790_cast_fp16_7, var_1776_cast_fp16_7))[name = tensor<string, []>("aw_207_cast_fp16")];
+            tensor<string, []> aw_209_equation_0 = const()[name = tensor<string, []>("aw_209_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_209_cast_fp16 = einsum(equation = aw_209_equation_0, values = (var_1790_cast_fp16_8, var_1776_cast_fp16_8))[name = tensor<string, []>("aw_209_cast_fp16")];
+            tensor<string, []> aw_211_equation_0 = const()[name = tensor<string, []>("aw_211_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_211_cast_fp16 = einsum(equation = aw_211_equation_0, values = (var_1790_cast_fp16_9, var_1776_cast_fp16_9))[name = tensor<string, []>("aw_211_cast_fp16")];
+            tensor<string, []> aw_213_equation_0 = const()[name = tensor<string, []>("aw_213_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_213_cast_fp16 = einsum(equation = aw_213_equation_0, values = (var_1790_cast_fp16_10, var_1776_cast_fp16_10))[name = tensor<string, []>("aw_213_cast_fp16")];
+            tensor<string, []> aw_215_equation_0 = const()[name = tensor<string, []>("aw_215_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_215_cast_fp16 = einsum(equation = aw_215_equation_0, values = (var_1790_cast_fp16_11, var_1776_cast_fp16_11))[name = tensor<string, []>("aw_215_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1840_cast_fp16 = softmax(axis = var_1724, x = aw_193_cast_fp16)[name = tensor<string, []>("op_1840_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1841_cast_fp16 = softmax(axis = var_1724, x = aw_195_cast_fp16)[name = tensor<string, []>("op_1841_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1842_cast_fp16 = softmax(axis = var_1724, x = aw_197_cast_fp16)[name = tensor<string, []>("op_1842_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1843_cast_fp16 = softmax(axis = var_1724, x = aw_199_cast_fp16)[name = tensor<string, []>("op_1843_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1844_cast_fp16 = softmax(axis = var_1724, x = aw_201_cast_fp16)[name = tensor<string, []>("op_1844_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1845_cast_fp16 = softmax(axis = var_1724, x = aw_203_cast_fp16)[name = tensor<string, []>("op_1845_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1846_cast_fp16 = softmax(axis = var_1724, x = aw_205_cast_fp16)[name = tensor<string, []>("op_1846_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1847_cast_fp16 = softmax(axis = var_1724, x = aw_207_cast_fp16)[name = tensor<string, []>("op_1847_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1848_cast_fp16 = softmax(axis = var_1724, x = aw_209_cast_fp16)[name = tensor<string, []>("op_1848_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1849_cast_fp16 = softmax(axis = var_1724, x = aw_211_cast_fp16)[name = tensor<string, []>("op_1849_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1850_cast_fp16 = softmax(axis = var_1724, x = aw_213_cast_fp16)[name = tensor<string, []>("op_1850_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1851_cast_fp16 = softmax(axis = var_1724, x = aw_215_cast_fp16)[name = tensor<string, []>("op_1851_cast_fp16")];
+            tensor<string, []> var_1853_equation_0 = const()[name = tensor<string, []>("op_1853_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1853_cast_fp16 = einsum(equation = var_1853_equation_0, values = (var_1803_cast_fp16_0, var_1840_cast_fp16))[name = tensor<string, []>("op_1853_cast_fp16")];
+            tensor<string, []> var_1855_equation_0 = const()[name = tensor<string, []>("op_1855_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1855_cast_fp16 = einsum(equation = var_1855_equation_0, values = (var_1803_cast_fp16_1, var_1841_cast_fp16))[name = tensor<string, []>("op_1855_cast_fp16")];
+            tensor<string, []> var_1857_equation_0 = const()[name = tensor<string, []>("op_1857_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1857_cast_fp16 = einsum(equation = var_1857_equation_0, values = (var_1803_cast_fp16_2, var_1842_cast_fp16))[name = tensor<string, []>("op_1857_cast_fp16")];
+            tensor<string, []> var_1859_equation_0 = const()[name = tensor<string, []>("op_1859_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1859_cast_fp16 = einsum(equation = var_1859_equation_0, values = (var_1803_cast_fp16_3, var_1843_cast_fp16))[name = tensor<string, []>("op_1859_cast_fp16")];
+            tensor<string, []> var_1861_equation_0 = const()[name = tensor<string, []>("op_1861_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1861_cast_fp16 = einsum(equation = var_1861_equation_0, values = (var_1803_cast_fp16_4, var_1844_cast_fp16))[name = tensor<string, []>("op_1861_cast_fp16")];
+            tensor<string, []> var_1863_equation_0 = const()[name = tensor<string, []>("op_1863_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1863_cast_fp16 = einsum(equation = var_1863_equation_0, values = (var_1803_cast_fp16_5, var_1845_cast_fp16))[name = tensor<string, []>("op_1863_cast_fp16")];
+            tensor<string, []> var_1865_equation_0 = const()[name = tensor<string, []>("op_1865_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1865_cast_fp16 = einsum(equation = var_1865_equation_0, values = (var_1803_cast_fp16_6, var_1846_cast_fp16))[name = tensor<string, []>("op_1865_cast_fp16")];
+            tensor<string, []> var_1867_equation_0 = const()[name = tensor<string, []>("op_1867_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1867_cast_fp16 = einsum(equation = var_1867_equation_0, values = (var_1803_cast_fp16_7, var_1847_cast_fp16))[name = tensor<string, []>("op_1867_cast_fp16")];
+            tensor<string, []> var_1869_equation_0 = const()[name = tensor<string, []>("op_1869_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1869_cast_fp16 = einsum(equation = var_1869_equation_0, values = (var_1803_cast_fp16_8, var_1848_cast_fp16))[name = tensor<string, []>("op_1869_cast_fp16")];
+            tensor<string, []> var_1871_equation_0 = const()[name = tensor<string, []>("op_1871_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1871_cast_fp16 = einsum(equation = var_1871_equation_0, values = (var_1803_cast_fp16_9, var_1849_cast_fp16))[name = tensor<string, []>("op_1871_cast_fp16")];
+            tensor<string, []> var_1873_equation_0 = const()[name = tensor<string, []>("op_1873_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1873_cast_fp16 = einsum(equation = var_1873_equation_0, values = (var_1803_cast_fp16_10, var_1850_cast_fp16))[name = tensor<string, []>("op_1873_cast_fp16")];
+            tensor<string, []> var_1875_equation_0 = const()[name = tensor<string, []>("op_1875_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1875_cast_fp16 = einsum(equation = var_1875_equation_0, values = (var_1803_cast_fp16_11, var_1851_cast_fp16))[name = tensor<string, []>("op_1875_cast_fp16")];
+            tensor<bool, []> input_85_interleave_0 = const()[name = tensor<string, []>("input_85_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_85_cast_fp16 = concat(axis = var_1724, interleave = input_85_interleave_0, values = (var_1853_cast_fp16, var_1855_cast_fp16, var_1857_cast_fp16, var_1859_cast_fp16, var_1861_cast_fp16, var_1863_cast_fp16, var_1865_cast_fp16, var_1867_cast_fp16, var_1869_cast_fp16, var_1871_cast_fp16, var_1873_cast_fp16, var_1875_cast_fp16))[name = tensor<string, []>("input_85_cast_fp16")];
+            tensor<string, []> var_1884_pad_type_0 = const()[name = tensor<string, []>("op_1884_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1884_strides_0 = const()[name = tensor<string, []>("op_1884_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1884_pad_0 = const()[name = tensor<string, []>("op_1884_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1884_dilations_0 = const()[name = tensor<string, []>("op_1884_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1884_groups_0 = const()[name = tensor<string, []>("op_1884_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_8_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123161920)))];
+            tensor<fp16, [768]> blocks_8_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124341632)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1884_cast_fp16 = conv(bias = blocks_8_attn_out_bias_to_fp16, dilations = var_1884_dilations_0, groups = var_1884_groups_0, pad = var_1884_pad_0, pad_type = var_1884_pad_type_0, strides = var_1884_strides_0, weight = blocks_8_attn_out_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("op_1884_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = var_1884_cast_fp16)[name = tensor<string, []>("inputs_35_cast_fp16")];
+            tensor<int32, [1]> input_87_axes_0 = const()[name = tensor<string, []>("input_87_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_87_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_87_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124343232)))];
+            tensor<fp16, [768]> input_87_beta_0_to_fp16 = const()[name = tensor<string, []>("input_87_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124344832)))];
+            tensor<fp16, []> var_1894_to_fp16 = const()[name = tensor<string, []>("op_1894_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_87_cast_fp16 = layer_norm(axes = input_87_axes_0, beta = input_87_beta_0_to_fp16, epsilon = var_1894_to_fp16, gamma = input_87_gamma_0_to_fp16, x = inputs_35_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
+            tensor<string, []> input_89_pad_type_0 = const()[name = tensor<string, []>("input_89_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_89_strides_0 = const()[name = tensor<string, []>("input_89_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_89_pad_0 = const()[name = tensor<string, []>("input_89_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_89_dilations_0 = const()[name = tensor<string, []>("input_89_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_89_groups_0 = const()[name = tensor<string, []>("input_89_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_8_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124346432)))];
+            tensor<fp16, [3072]> blocks_8_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(129065088)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_89_cast_fp16 = conv(bias = blocks_8_mlp_0_bias_to_fp16, dilations = input_89_dilations_0, groups = input_89_groups_0, pad = input_89_pad_0, pad_type = input_89_pad_type_0, strides = input_89_strides_0, weight = blocks_8_mlp_0_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
+            tensor<string, []> input_91_mode_0 = const()[name = tensor<string, []>("input_91_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_91_cast_fp16 = gelu(mode = input_91_mode_0, x = input_89_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
+            tensor<string, []> var_1920_pad_type_0 = const()[name = tensor<string, []>("op_1920_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1920_strides_0 = const()[name = tensor<string, []>("op_1920_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1920_pad_0 = const()[name = tensor<string, []>("op_1920_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1920_dilations_0 = const()[name = tensor<string, []>("op_1920_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1920_groups_0 = const()[name = tensor<string, []>("op_1920_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_8_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(129071296)))];
+            tensor<fp16, [768]> blocks_8_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133789952)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1920_cast_fp16 = conv(bias = blocks_8_mlp_2_bias_to_fp16, dilations = var_1920_dilations_0, groups = var_1920_groups_0, pad = var_1920_pad_0, pad_type = var_1920_pad_type_0, strides = var_1920_strides_0, weight = blocks_8_mlp_2_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("op_1920_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = var_1920_cast_fp16)[name = tensor<string, []>("inputs_37_cast_fp16")];
+            tensor<int32, []> var_1929 = const()[name = tensor<string, []>("op_1929"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_93_axes_0 = const()[name = tensor<string, []>("input_93_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_93_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_93_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133791552)))];
+            tensor<fp16, [768]> input_93_beta_0_to_fp16 = const()[name = tensor<string, []>("input_93_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133793152)))];
+            tensor<fp16, []> var_1945_to_fp16 = const()[name = tensor<string, []>("op_1945_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_93_cast_fp16 = layer_norm(axes = input_93_axes_0, beta = input_93_beta_0_to_fp16, epsilon = var_1945_to_fp16, gamma = input_93_gamma_0_to_fp16, x = inputs_37_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
+            tensor<string, []> q_19_pad_type_0 = const()[name = tensor<string, []>("q_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_19_strides_0 = const()[name = tensor<string, []>("q_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_19_pad_0 = const()[name = tensor<string, []>("q_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_19_dilations_0 = const()[name = tensor<string, []>("q_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_19_groups_0 = const()[name = tensor<string, []>("q_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_1980_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1980_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133794752)))];
+            tensor<fp16, [768]> var_1980_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1980_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(134974464)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1980_cast_fp16 = conv(bias = var_1980_bias_0_to_fp16, dilations = q_19_dilations_0, groups = q_19_groups_0, pad = q_19_pad_0, pad_type = q_19_pad_type_0, strides = q_19_strides_0, weight = var_1980_weight_0_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("op_1980_cast_fp16")];
+            tensor<string, []> k_19_pad_type_0 = const()[name = tensor<string, []>("k_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_19_strides_0 = const()[name = tensor<string, []>("k_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_19_pad_0 = const()[name = tensor<string, []>("k_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_19_dilations_0 = const()[name = tensor<string, []>("k_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_19_groups_0 = const()[name = tensor<string, []>("k_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_9_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(134976064)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_19_cast_fp16 = conv(dilations = k_19_dilations_0, groups = k_19_groups_0, pad = k_19_pad_0, pad_type = k_19_pad_type_0, strides = k_19_strides_0, weight = blocks_9_attn_key_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("k_19_cast_fp16")];
+            tensor<string, []> var_1978_pad_type_0 = const()[name = tensor<string, []>("op_1978_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1978_strides_0 = const()[name = tensor<string, []>("op_1978_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1978_pad_0 = const()[name = tensor<string, []>("op_1978_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1978_dilations_0 = const()[name = tensor<string, []>("op_1978_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1978_groups_0 = const()[name = tensor<string, []>("op_1978_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_9_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(136155776)))];
+            tensor<fp16, [768]> blocks_9_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137335488)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1978_cast_fp16 = conv(bias = blocks_9_attn_value_bias_to_fp16, dilations = var_1978_dilations_0, groups = var_1978_groups_0, pad = var_1978_pad_0, pad_type = var_1978_pad_type_0, strides = var_1978_strides_0, weight = blocks_9_attn_value_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("op_1978_cast_fp16")];
+            tensor<int32, [12]> tile_27 = const()[name = tensor<string, []>("tile_27"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1981_axis_0 = const()[name = tensor<string, []>("op_1981_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_11 = split(axis = var_1981_axis_0, split_sizes = tile_27, x = var_1980_cast_fp16)[name = tensor<string, []>("op_1981_cast_fp16")];
+            tensor<int32, [4]> var_1994_perm_0 = const()[name = tensor<string, []>("op_1994_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_28 = const()[name = tensor<string, []>("tile_28"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1995_axis_0 = const()[name = tensor<string, []>("op_1995_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_1994_cast_fp16 = transpose(perm = var_1994_perm_0, x = k_19_cast_fp16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_11 = split(axis = var_1995_axis_0, split_sizes = tile_28, x = var_1994_cast_fp16)[name = tensor<string, []>("op_1995_cast_fp16")];
+            tensor<int32, [12]> tile_29 = const()[name = tensor<string, []>("tile_29"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2008_axis_0 = const()[name = tensor<string, []>("op_2008_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_11 = split(axis = var_2008_axis_0, split_sizes = tile_29, x = var_1978_cast_fp16)[name = tensor<string, []>("op_2008_cast_fp16")];
+            tensor<string, []> aw_217_equation_0 = const()[name = tensor<string, []>("aw_217_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_217_cast_fp16 = einsum(equation = aw_217_equation_0, values = (var_1995_cast_fp16_0, var_1981_cast_fp16_0))[name = tensor<string, []>("aw_217_cast_fp16")];
+            tensor<string, []> aw_219_equation_0 = const()[name = tensor<string, []>("aw_219_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_219_cast_fp16 = einsum(equation = aw_219_equation_0, values = (var_1995_cast_fp16_1, var_1981_cast_fp16_1))[name = tensor<string, []>("aw_219_cast_fp16")];
+            tensor<string, []> aw_221_equation_0 = const()[name = tensor<string, []>("aw_221_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_221_cast_fp16 = einsum(equation = aw_221_equation_0, values = (var_1995_cast_fp16_2, var_1981_cast_fp16_2))[name = tensor<string, []>("aw_221_cast_fp16")];
+            tensor<string, []> aw_223_equation_0 = const()[name = tensor<string, []>("aw_223_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_223_cast_fp16 = einsum(equation = aw_223_equation_0, values = (var_1995_cast_fp16_3, var_1981_cast_fp16_3))[name = tensor<string, []>("aw_223_cast_fp16")];
+            tensor<string, []> aw_225_equation_0 = const()[name = tensor<string, []>("aw_225_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_225_cast_fp16 = einsum(equation = aw_225_equation_0, values = (var_1995_cast_fp16_4, var_1981_cast_fp16_4))[name = tensor<string, []>("aw_225_cast_fp16")];
+            tensor<string, []> aw_227_equation_0 = const()[name = tensor<string, []>("aw_227_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_227_cast_fp16 = einsum(equation = aw_227_equation_0, values = (var_1995_cast_fp16_5, var_1981_cast_fp16_5))[name = tensor<string, []>("aw_227_cast_fp16")];
+            tensor<string, []> aw_229_equation_0 = const()[name = tensor<string, []>("aw_229_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_229_cast_fp16 = einsum(equation = aw_229_equation_0, values = (var_1995_cast_fp16_6, var_1981_cast_fp16_6))[name = tensor<string, []>("aw_229_cast_fp16")];
+            tensor<string, []> aw_231_equation_0 = const()[name = tensor<string, []>("aw_231_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_231_cast_fp16 = einsum(equation = aw_231_equation_0, values = (var_1995_cast_fp16_7, var_1981_cast_fp16_7))[name = tensor<string, []>("aw_231_cast_fp16")];
+            tensor<string, []> aw_233_equation_0 = const()[name = tensor<string, []>("aw_233_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_233_cast_fp16 = einsum(equation = aw_233_equation_0, values = (var_1995_cast_fp16_8, var_1981_cast_fp16_8))[name = tensor<string, []>("aw_233_cast_fp16")];
+            tensor<string, []> aw_235_equation_0 = const()[name = tensor<string, []>("aw_235_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_235_cast_fp16 = einsum(equation = aw_235_equation_0, values = (var_1995_cast_fp16_9, var_1981_cast_fp16_9))[name = tensor<string, []>("aw_235_cast_fp16")];
+            tensor<string, []> aw_237_equation_0 = const()[name = tensor<string, []>("aw_237_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_237_cast_fp16 = einsum(equation = aw_237_equation_0, values = (var_1995_cast_fp16_10, var_1981_cast_fp16_10))[name = tensor<string, []>("aw_237_cast_fp16")];
+            tensor<string, []> aw_239_equation_0 = const()[name = tensor<string, []>("aw_239_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_239_cast_fp16 = einsum(equation = aw_239_equation_0, values = (var_1995_cast_fp16_11, var_1981_cast_fp16_11))[name = tensor<string, []>("aw_239_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2045_cast_fp16 = softmax(axis = var_1929, x = aw_217_cast_fp16)[name = tensor<string, []>("op_2045_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2046_cast_fp16 = softmax(axis = var_1929, x = aw_219_cast_fp16)[name = tensor<string, []>("op_2046_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2047_cast_fp16 = softmax(axis = var_1929, x = aw_221_cast_fp16)[name = tensor<string, []>("op_2047_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2048_cast_fp16 = softmax(axis = var_1929, x = aw_223_cast_fp16)[name = tensor<string, []>("op_2048_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2049_cast_fp16 = softmax(axis = var_1929, x = aw_225_cast_fp16)[name = tensor<string, []>("op_2049_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2050_cast_fp16 = softmax(axis = var_1929, x = aw_227_cast_fp16)[name = tensor<string, []>("op_2050_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2051_cast_fp16 = softmax(axis = var_1929, x = aw_229_cast_fp16)[name = tensor<string, []>("op_2051_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2052_cast_fp16 = softmax(axis = var_1929, x = aw_231_cast_fp16)[name = tensor<string, []>("op_2052_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2053_cast_fp16 = softmax(axis = var_1929, x = aw_233_cast_fp16)[name = tensor<string, []>("op_2053_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2054_cast_fp16 = softmax(axis = var_1929, x = aw_235_cast_fp16)[name = tensor<string, []>("op_2054_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2055_cast_fp16 = softmax(axis = var_1929, x = aw_237_cast_fp16)[name = tensor<string, []>("op_2055_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2056_cast_fp16 = softmax(axis = var_1929, x = aw_239_cast_fp16)[name = tensor<string, []>("op_2056_cast_fp16")];
+            tensor<string, []> var_2058_equation_0 = const()[name = tensor<string, []>("op_2058_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2058_cast_fp16 = einsum(equation = var_2058_equation_0, values = (var_2008_cast_fp16_0, var_2045_cast_fp16))[name = tensor<string, []>("op_2058_cast_fp16")];
+            tensor<string, []> var_2060_equation_0 = const()[name = tensor<string, []>("op_2060_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2060_cast_fp16 = einsum(equation = var_2060_equation_0, values = (var_2008_cast_fp16_1, var_2046_cast_fp16))[name = tensor<string, []>("op_2060_cast_fp16")];
+            tensor<string, []> var_2062_equation_0 = const()[name = tensor<string, []>("op_2062_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2062_cast_fp16 = einsum(equation = var_2062_equation_0, values = (var_2008_cast_fp16_2, var_2047_cast_fp16))[name = tensor<string, []>("op_2062_cast_fp16")];
+            tensor<string, []> var_2064_equation_0 = const()[name = tensor<string, []>("op_2064_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2064_cast_fp16 = einsum(equation = var_2064_equation_0, values = (var_2008_cast_fp16_3, var_2048_cast_fp16))[name = tensor<string, []>("op_2064_cast_fp16")];
+            tensor<string, []> var_2066_equation_0 = const()[name = tensor<string, []>("op_2066_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2066_cast_fp16 = einsum(equation = var_2066_equation_0, values = (var_2008_cast_fp16_4, var_2049_cast_fp16))[name = tensor<string, []>("op_2066_cast_fp16")];
+            tensor<string, []> var_2068_equation_0 = const()[name = tensor<string, []>("op_2068_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2068_cast_fp16 = einsum(equation = var_2068_equation_0, values = (var_2008_cast_fp16_5, var_2050_cast_fp16))[name = tensor<string, []>("op_2068_cast_fp16")];
+            tensor<string, []> var_2070_equation_0 = const()[name = tensor<string, []>("op_2070_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2070_cast_fp16 = einsum(equation = var_2070_equation_0, values = (var_2008_cast_fp16_6, var_2051_cast_fp16))[name = tensor<string, []>("op_2070_cast_fp16")];
+            tensor<string, []> var_2072_equation_0 = const()[name = tensor<string, []>("op_2072_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2072_cast_fp16 = einsum(equation = var_2072_equation_0, values = (var_2008_cast_fp16_7, var_2052_cast_fp16))[name = tensor<string, []>("op_2072_cast_fp16")];
+            tensor<string, []> var_2074_equation_0 = const()[name = tensor<string, []>("op_2074_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2074_cast_fp16 = einsum(equation = var_2074_equation_0, values = (var_2008_cast_fp16_8, var_2053_cast_fp16))[name = tensor<string, []>("op_2074_cast_fp16")];
+            tensor<string, []> var_2076_equation_0 = const()[name = tensor<string, []>("op_2076_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2076_cast_fp16 = einsum(equation = var_2076_equation_0, values = (var_2008_cast_fp16_9, var_2054_cast_fp16))[name = tensor<string, []>("op_2076_cast_fp16")];
+            tensor<string, []> var_2078_equation_0 = const()[name = tensor<string, []>("op_2078_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2078_cast_fp16 = einsum(equation = var_2078_equation_0, values = (var_2008_cast_fp16_10, var_2055_cast_fp16))[name = tensor<string, []>("op_2078_cast_fp16")];
+            tensor<string, []> var_2080_equation_0 = const()[name = tensor<string, []>("op_2080_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2080_cast_fp16 = einsum(equation = var_2080_equation_0, values = (var_2008_cast_fp16_11, var_2056_cast_fp16))[name = tensor<string, []>("op_2080_cast_fp16")];
+            tensor<bool, []> input_95_interleave_0 = const()[name = tensor<string, []>("input_95_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_95_cast_fp16 = concat(axis = var_1929, interleave = input_95_interleave_0, values = (var_2058_cast_fp16, var_2060_cast_fp16, var_2062_cast_fp16, var_2064_cast_fp16, var_2066_cast_fp16, var_2068_cast_fp16, var_2070_cast_fp16, var_2072_cast_fp16, var_2074_cast_fp16, var_2076_cast_fp16, var_2078_cast_fp16, var_2080_cast_fp16))[name = tensor<string, []>("input_95_cast_fp16")];
+            tensor<string, []> var_2089_pad_type_0 = const()[name = tensor<string, []>("op_2089_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2089_strides_0 = const()[name = tensor<string, []>("op_2089_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2089_pad_0 = const()[name = tensor<string, []>("op_2089_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2089_dilations_0 = const()[name = tensor<string, []>("op_2089_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2089_groups_0 = const()[name = tensor<string, []>("op_2089_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_9_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137337088)))];
+            tensor<fp16, [768]> blocks_9_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138516800)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2089_cast_fp16 = conv(bias = blocks_9_attn_out_bias_to_fp16, dilations = var_2089_dilations_0, groups = var_2089_groups_0, pad = var_2089_pad_0, pad_type = var_2089_pad_type_0, strides = var_2089_strides_0, weight = blocks_9_attn_out_weight_to_fp16, x = input_95_cast_fp16)[name = tensor<string, []>("op_2089_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = var_2089_cast_fp16)[name = tensor<string, []>("inputs_39_cast_fp16")];
+            tensor<int32, [1]> input_97_axes_0 = const()[name = tensor<string, []>("input_97_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_97_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_97_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138518400)))];
+            tensor<fp16, [768]> input_97_beta_0_to_fp16 = const()[name = tensor<string, []>("input_97_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138520000)))];
+            tensor<fp16, []> var_2099_to_fp16 = const()[name = tensor<string, []>("op_2099_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_97_cast_fp16 = layer_norm(axes = input_97_axes_0, beta = input_97_beta_0_to_fp16, epsilon = var_2099_to_fp16, gamma = input_97_gamma_0_to_fp16, x = inputs_39_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")];
+            tensor<string, []> input_99_pad_type_0 = const()[name = tensor<string, []>("input_99_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_99_strides_0 = const()[name = tensor<string, []>("input_99_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_99_pad_0 = const()[name = tensor<string, []>("input_99_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_99_dilations_0 = const()[name = tensor<string, []>("input_99_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_99_groups_0 = const()[name = tensor<string, []>("input_99_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_9_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138521600)))];
+            tensor<fp16, [3072]> blocks_9_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(143240256)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_99_cast_fp16 = conv(bias = blocks_9_mlp_0_bias_to_fp16, dilations = input_99_dilations_0, groups = input_99_groups_0, pad = input_99_pad_0, pad_type = input_99_pad_type_0, strides = input_99_strides_0, weight = blocks_9_mlp_0_weight_to_fp16, x = input_97_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
+            tensor<string, []> input_101_mode_0 = const()[name = tensor<string, []>("input_101_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_101_cast_fp16 = gelu(mode = input_101_mode_0, x = input_99_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
+            tensor<string, []> var_2125_pad_type_0 = const()[name = tensor<string, []>("op_2125_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2125_strides_0 = const()[name = tensor<string, []>("op_2125_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2125_pad_0 = const()[name = tensor<string, []>("op_2125_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2125_dilations_0 = const()[name = tensor<string, []>("op_2125_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2125_groups_0 = const()[name = tensor<string, []>("op_2125_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_9_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(143246464)))];
+            tensor<fp16, [768]> blocks_9_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147965120)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2125_cast_fp16 = conv(bias = blocks_9_mlp_2_bias_to_fp16, dilations = var_2125_dilations_0, groups = var_2125_groups_0, pad = var_2125_pad_0, pad_type = var_2125_pad_type_0, strides = var_2125_strides_0, weight = blocks_9_mlp_2_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("op_2125_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = var_2125_cast_fp16)[name = tensor<string, []>("inputs_41_cast_fp16")];
+            tensor<int32, []> var_2134 = const()[name = tensor<string, []>("op_2134"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_103_axes_0 = const()[name = tensor<string, []>("input_103_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_103_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_103_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147966720)))];
+            tensor<fp16, [768]> input_103_beta_0_to_fp16 = const()[name = tensor<string, []>("input_103_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147968320)))];
+            tensor<fp16, []> var_2150_to_fp16 = const()[name = tensor<string, []>("op_2150_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_103_cast_fp16 = layer_norm(axes = input_103_axes_0, beta = input_103_beta_0_to_fp16, epsilon = var_2150_to_fp16, gamma = input_103_gamma_0_to_fp16, x = inputs_41_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
+            tensor<string, []> q_21_pad_type_0 = const()[name = tensor<string, []>("q_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_21_strides_0 = const()[name = tensor<string, []>("q_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_21_pad_0 = const()[name = tensor<string, []>("q_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_21_dilations_0 = const()[name = tensor<string, []>("q_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_21_groups_0 = const()[name = tensor<string, []>("q_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_2185_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2185_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147969920)))];
+            tensor<fp16, [768]> var_2185_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2185_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(149149632)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2185_cast_fp16 = conv(bias = var_2185_bias_0_to_fp16, dilations = q_21_dilations_0, groups = q_21_groups_0, pad = q_21_pad_0, pad_type = q_21_pad_type_0, strides = q_21_strides_0, weight = var_2185_weight_0_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("op_2185_cast_fp16")];
+            tensor<string, []> k_21_pad_type_0 = const()[name = tensor<string, []>("k_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_21_strides_0 = const()[name = tensor<string, []>("k_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_21_pad_0 = const()[name = tensor<string, []>("k_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_21_dilations_0 = const()[name = tensor<string, []>("k_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_21_groups_0 = const()[name = tensor<string, []>("k_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_10_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(149151232)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_21_cast_fp16 = conv(dilations = k_21_dilations_0, groups = k_21_groups_0, pad = k_21_pad_0, pad_type = k_21_pad_type_0, strides = k_21_strides_0, weight = blocks_10_attn_key_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("k_21_cast_fp16")];
+            tensor<string, []> var_2183_pad_type_0 = const()[name = tensor<string, []>("op_2183_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2183_strides_0 = const()[name = tensor<string, []>("op_2183_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2183_pad_0 = const()[name = tensor<string, []>("op_2183_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2183_dilations_0 = const()[name = tensor<string, []>("op_2183_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2183_groups_0 = const()[name = tensor<string, []>("op_2183_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_10_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(150330944)))];
+            tensor<fp16, [768]> blocks_10_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(151510656)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2183_cast_fp16 = conv(bias = blocks_10_attn_value_bias_to_fp16, dilations = var_2183_dilations_0, groups = var_2183_groups_0, pad = var_2183_pad_0, pad_type = var_2183_pad_type_0, strides = var_2183_strides_0, weight = blocks_10_attn_value_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("op_2183_cast_fp16")];
+            tensor<int32, [12]> tile_30 = const()[name = tensor<string, []>("tile_30"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2186_axis_0 = const()[name = tensor<string, []>("op_2186_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_11 = split(axis = var_2186_axis_0, split_sizes = tile_30, x = var_2185_cast_fp16)[name = tensor<string, []>("op_2186_cast_fp16")];
+            tensor<int32, [4]> var_2199_perm_0 = const()[name = tensor<string, []>("op_2199_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_31 = const()[name = tensor<string, []>("tile_31"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2200_axis_0 = const()[name = tensor<string, []>("op_2200_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_2199_cast_fp16 = transpose(perm = var_2199_perm_0, x = k_21_cast_fp16)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_11 = split(axis = var_2200_axis_0, split_sizes = tile_31, x = var_2199_cast_fp16)[name = tensor<string, []>("op_2200_cast_fp16")];
+            tensor<int32, [12]> tile_32 = const()[name = tensor<string, []>("tile_32"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2213_axis_0 = const()[name = tensor<string, []>("op_2213_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_11 = split(axis = var_2213_axis_0, split_sizes = tile_32, x = var_2183_cast_fp16)[name = tensor<string, []>("op_2213_cast_fp16")];
+            tensor<string, []> aw_241_equation_0 = const()[name = tensor<string, []>("aw_241_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_241_cast_fp16 = einsum(equation = aw_241_equation_0, values = (var_2200_cast_fp16_0, var_2186_cast_fp16_0))[name = tensor<string, []>("aw_241_cast_fp16")];
+            tensor<string, []> aw_243_equation_0 = const()[name = tensor<string, []>("aw_243_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_243_cast_fp16 = einsum(equation = aw_243_equation_0, values = (var_2200_cast_fp16_1, var_2186_cast_fp16_1))[name = tensor<string, []>("aw_243_cast_fp16")];
+            tensor<string, []> aw_245_equation_0 = const()[name = tensor<string, []>("aw_245_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_245_cast_fp16 = einsum(equation = aw_245_equation_0, values = (var_2200_cast_fp16_2, var_2186_cast_fp16_2))[name = tensor<string, []>("aw_245_cast_fp16")];
+            tensor<string, []> aw_247_equation_0 = const()[name = tensor<string, []>("aw_247_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_247_cast_fp16 = einsum(equation = aw_247_equation_0, values = (var_2200_cast_fp16_3, var_2186_cast_fp16_3))[name = tensor<string, []>("aw_247_cast_fp16")];
+            tensor<string, []> aw_249_equation_0 = const()[name = tensor<string, []>("aw_249_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_249_cast_fp16 = einsum(equation = aw_249_equation_0, values = (var_2200_cast_fp16_4, var_2186_cast_fp16_4))[name = tensor<string, []>("aw_249_cast_fp16")];
+            tensor<string, []> aw_251_equation_0 = const()[name = tensor<string, []>("aw_251_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_251_cast_fp16 = einsum(equation = aw_251_equation_0, values = (var_2200_cast_fp16_5, var_2186_cast_fp16_5))[name = tensor<string, []>("aw_251_cast_fp16")];
+            tensor<string, []> aw_253_equation_0 = const()[name = tensor<string, []>("aw_253_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_253_cast_fp16 = einsum(equation = aw_253_equation_0, values = (var_2200_cast_fp16_6, var_2186_cast_fp16_6))[name = tensor<string, []>("aw_253_cast_fp16")];
+            tensor<string, []> aw_255_equation_0 = const()[name = tensor<string, []>("aw_255_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_255_cast_fp16 = einsum(equation = aw_255_equation_0, values = (var_2200_cast_fp16_7, var_2186_cast_fp16_7))[name = tensor<string, []>("aw_255_cast_fp16")];
+            tensor<string, []> aw_257_equation_0 = const()[name = tensor<string, []>("aw_257_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_257_cast_fp16 = einsum(equation = aw_257_equation_0, values = (var_2200_cast_fp16_8, var_2186_cast_fp16_8))[name = tensor<string, []>("aw_257_cast_fp16")];
+            tensor<string, []> aw_259_equation_0 = const()[name = tensor<string, []>("aw_259_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_259_cast_fp16 = einsum(equation = aw_259_equation_0, values = (var_2200_cast_fp16_9, var_2186_cast_fp16_9))[name = tensor<string, []>("aw_259_cast_fp16")];
+            tensor<string, []> aw_261_equation_0 = const()[name = tensor<string, []>("aw_261_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_261_cast_fp16 = einsum(equation = aw_261_equation_0, values = (var_2200_cast_fp16_10, var_2186_cast_fp16_10))[name = tensor<string, []>("aw_261_cast_fp16")];
+            tensor<string, []> aw_263_equation_0 = const()[name = tensor<string, []>("aw_263_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_263_cast_fp16 = einsum(equation = aw_263_equation_0, values = (var_2200_cast_fp16_11, var_2186_cast_fp16_11))[name = tensor<string, []>("aw_263_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2250_cast_fp16 = softmax(axis = var_2134, x = aw_241_cast_fp16)[name = tensor<string, []>("op_2250_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2251_cast_fp16 = softmax(axis = var_2134, x = aw_243_cast_fp16)[name = tensor<string, []>("op_2251_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2252_cast_fp16 = softmax(axis = var_2134, x = aw_245_cast_fp16)[name = tensor<string, []>("op_2252_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2253_cast_fp16 = softmax(axis = var_2134, x = aw_247_cast_fp16)[name = tensor<string, []>("op_2253_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2254_cast_fp16 = softmax(axis = var_2134, x = aw_249_cast_fp16)[name = tensor<string, []>("op_2254_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2255_cast_fp16 = softmax(axis = var_2134, x = aw_251_cast_fp16)[name = tensor<string, []>("op_2255_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2256_cast_fp16 = softmax(axis = var_2134, x = aw_253_cast_fp16)[name = tensor<string, []>("op_2256_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2257_cast_fp16 = softmax(axis = var_2134, x = aw_255_cast_fp16)[name = tensor<string, []>("op_2257_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2258_cast_fp16 = softmax(axis = var_2134, x = aw_257_cast_fp16)[name = tensor<string, []>("op_2258_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2259_cast_fp16 = softmax(axis = var_2134, x = aw_259_cast_fp16)[name = tensor<string, []>("op_2259_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2260_cast_fp16 = softmax(axis = var_2134, x = aw_261_cast_fp16)[name = tensor<string, []>("op_2260_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2261_cast_fp16 = softmax(axis = var_2134, x = aw_263_cast_fp16)[name = tensor<string, []>("op_2261_cast_fp16")];
+            tensor<string, []> var_2263_equation_0 = const()[name = tensor<string, []>("op_2263_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2263_cast_fp16 = einsum(equation = var_2263_equation_0, values = (var_2213_cast_fp16_0, var_2250_cast_fp16))[name = tensor<string, []>("op_2263_cast_fp16")];
+            tensor<string, []> var_2265_equation_0 = const()[name = tensor<string, []>("op_2265_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2265_cast_fp16 = einsum(equation = var_2265_equation_0, values = (var_2213_cast_fp16_1, var_2251_cast_fp16))[name = tensor<string, []>("op_2265_cast_fp16")];
+            tensor<string, []> var_2267_equation_0 = const()[name = tensor<string, []>("op_2267_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2267_cast_fp16 = einsum(equation = var_2267_equation_0, values = (var_2213_cast_fp16_2, var_2252_cast_fp16))[name = tensor<string, []>("op_2267_cast_fp16")];
+            tensor<string, []> var_2269_equation_0 = const()[name = tensor<string, []>("op_2269_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2269_cast_fp16 = einsum(equation = var_2269_equation_0, values = (var_2213_cast_fp16_3, var_2253_cast_fp16))[name = tensor<string, []>("op_2269_cast_fp16")];
+            tensor<string, []> var_2271_equation_0 = const()[name = tensor<string, []>("op_2271_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2271_cast_fp16 = einsum(equation = var_2271_equation_0, values = (var_2213_cast_fp16_4, var_2254_cast_fp16))[name = tensor<string, []>("op_2271_cast_fp16")];
+            tensor<string, []> var_2273_equation_0 = const()[name = tensor<string, []>("op_2273_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2273_cast_fp16 = einsum(equation = var_2273_equation_0, values = (var_2213_cast_fp16_5, var_2255_cast_fp16))[name = tensor<string, []>("op_2273_cast_fp16")];
+            tensor<string, []> var_2275_equation_0 = const()[name = tensor<string, []>("op_2275_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2275_cast_fp16 = einsum(equation = var_2275_equation_0, values = (var_2213_cast_fp16_6, var_2256_cast_fp16))[name = tensor<string, []>("op_2275_cast_fp16")];
+            tensor<string, []> var_2277_equation_0 = const()[name = tensor<string, []>("op_2277_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2277_cast_fp16 = einsum(equation = var_2277_equation_0, values = (var_2213_cast_fp16_7, var_2257_cast_fp16))[name = tensor<string, []>("op_2277_cast_fp16")];
+            tensor<string, []> var_2279_equation_0 = const()[name = tensor<string, []>("op_2279_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2279_cast_fp16 = einsum(equation = var_2279_equation_0, values = (var_2213_cast_fp16_8, var_2258_cast_fp16))[name = tensor<string, []>("op_2279_cast_fp16")];
+            tensor<string, []> var_2281_equation_0 = const()[name = tensor<string, []>("op_2281_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2281_cast_fp16 = einsum(equation = var_2281_equation_0, values = (var_2213_cast_fp16_9, var_2259_cast_fp16))[name = tensor<string, []>("op_2281_cast_fp16")];
+            tensor<string, []> var_2283_equation_0 = const()[name = tensor<string, []>("op_2283_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2283_cast_fp16 = einsum(equation = var_2283_equation_0, values = (var_2213_cast_fp16_10, var_2260_cast_fp16))[name = tensor<string, []>("op_2283_cast_fp16")];
+            tensor<string, []> var_2285_equation_0 = const()[name = tensor<string, []>("op_2285_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2285_cast_fp16 = einsum(equation = var_2285_equation_0, values = (var_2213_cast_fp16_11, var_2261_cast_fp16))[name = tensor<string, []>("op_2285_cast_fp16")];
+            tensor<bool, []> input_105_interleave_0 = const()[name = tensor<string, []>("input_105_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_105_cast_fp16 = concat(axis = var_2134, interleave = input_105_interleave_0, values = (var_2263_cast_fp16, var_2265_cast_fp16, var_2267_cast_fp16, var_2269_cast_fp16, var_2271_cast_fp16, var_2273_cast_fp16, var_2275_cast_fp16, var_2277_cast_fp16, var_2279_cast_fp16, var_2281_cast_fp16, var_2283_cast_fp16, var_2285_cast_fp16))[name = tensor<string, []>("input_105_cast_fp16")];
+            tensor<string, []> var_2294_pad_type_0 = const()[name = tensor<string, []>("op_2294_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2294_strides_0 = const()[name = tensor<string, []>("op_2294_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2294_pad_0 = const()[name = tensor<string, []>("op_2294_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2294_dilations_0 = const()[name = tensor<string, []>("op_2294_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2294_groups_0 = const()[name = tensor<string, []>("op_2294_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_10_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(151512256)))];
+            tensor<fp16, [768]> blocks_10_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152691968)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2294_cast_fp16 = conv(bias = blocks_10_attn_out_bias_to_fp16, dilations = var_2294_dilations_0, groups = var_2294_groups_0, pad = var_2294_pad_0, pad_type = var_2294_pad_type_0, strides = var_2294_strides_0, weight = blocks_10_attn_out_weight_to_fp16, x = input_105_cast_fp16)[name = tensor<string, []>("op_2294_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = var_2294_cast_fp16)[name = tensor<string, []>("inputs_43_cast_fp16")];
+            tensor<int32, [1]> input_107_axes_0 = const()[name = tensor<string, []>("input_107_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_107_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_107_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152693568)))];
+            tensor<fp16, [768]> input_107_beta_0_to_fp16 = const()[name = tensor<string, []>("input_107_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152695168)))];
+            tensor<fp16, []> var_2304_to_fp16 = const()[name = tensor<string, []>("op_2304_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_107_cast_fp16 = layer_norm(axes = input_107_axes_0, beta = input_107_beta_0_to_fp16, epsilon = var_2304_to_fp16, gamma = input_107_gamma_0_to_fp16, x = inputs_43_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
+            tensor<string, []> input_109_pad_type_0 = const()[name = tensor<string, []>("input_109_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_109_strides_0 = const()[name = tensor<string, []>("input_109_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_109_pad_0 = const()[name = tensor<string, []>("input_109_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_109_dilations_0 = const()[name = tensor<string, []>("input_109_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_109_groups_0 = const()[name = tensor<string, []>("input_109_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_10_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152696768)))];
+            tensor<fp16, [3072]> blocks_10_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157415424)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_109_cast_fp16 = conv(bias = blocks_10_mlp_0_bias_to_fp16, dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = blocks_10_mlp_0_weight_to_fp16, x = input_107_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
+            tensor<string, []> input_111_mode_0 = const()[name = tensor<string, []>("input_111_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_111_cast_fp16 = gelu(mode = input_111_mode_0, x = input_109_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
+            tensor<string, []> var_2330_pad_type_0 = const()[name = tensor<string, []>("op_2330_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2330_strides_0 = const()[name = tensor<string, []>("op_2330_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2330_pad_0 = const()[name = tensor<string, []>("op_2330_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2330_dilations_0 = const()[name = tensor<string, []>("op_2330_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2330_groups_0 = const()[name = tensor<string, []>("op_2330_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_10_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157421632)))];
+            tensor<fp16, [768]> blocks_10_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162140288)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2330_cast_fp16 = conv(bias = blocks_10_mlp_2_bias_to_fp16, dilations = var_2330_dilations_0, groups = var_2330_groups_0, pad = var_2330_pad_0, pad_type = var_2330_pad_type_0, strides = var_2330_strides_0, weight = blocks_10_mlp_2_weight_to_fp16, x = input_111_cast_fp16)[name = tensor<string, []>("op_2330_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = var_2330_cast_fp16)[name = tensor<string, []>("inputs_45_cast_fp16")];
+            tensor<int32, []> var_2339 = const()[name = tensor<string, []>("op_2339"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_113_axes_0 = const()[name = tensor<string, []>("input_113_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_113_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_113_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162141888)))];
+            tensor<fp16, [768]> input_113_beta_0_to_fp16 = const()[name = tensor<string, []>("input_113_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162143488)))];
+            tensor<fp16, []> var_2355_to_fp16 = const()[name = tensor<string, []>("op_2355_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_113_cast_fp16 = layer_norm(axes = input_113_axes_0, beta = input_113_beta_0_to_fp16, epsilon = var_2355_to_fp16, gamma = input_113_gamma_0_to_fp16, x = inputs_45_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
+            tensor<string, []> q_pad_type_0 = const()[name = tensor<string, []>("q_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_strides_0 = const()[name = tensor<string, []>("q_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_pad_0 = const()[name = tensor<string, []>("q_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_dilations_0 = const()[name = tensor<string, []>("q_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_groups_0 = const()[name = tensor<string, []>("q_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_2390_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2390_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162145088)))];
+            tensor<fp16, [768]> var_2390_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2390_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163324800)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2390_cast_fp16 = conv(bias = var_2390_bias_0_to_fp16, dilations = q_dilations_0, groups = q_groups_0, pad = q_pad_0, pad_type = q_pad_type_0, strides = q_strides_0, weight = var_2390_weight_0_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("op_2390_cast_fp16")];
+            tensor<string, []> k_pad_type_0 = const()[name = tensor<string, []>("k_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_strides_0 = const()[name = tensor<string, []>("k_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_pad_0 = const()[name = tensor<string, []>("k_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_dilations_0 = const()[name = tensor<string, []>("k_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_groups_0 = const()[name = tensor<string, []>("k_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_11_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163326400)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_cast_fp16 = conv(dilations = k_dilations_0, groups = k_groups_0, pad = k_pad_0, pad_type = k_pad_type_0, strides = k_strides_0, weight = blocks_11_attn_key_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("k_cast_fp16")];
+            tensor<string, []> var_2388_pad_type_0 = const()[name = tensor<string, []>("op_2388_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2388_strides_0 = const()[name = tensor<string, []>("op_2388_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2388_pad_0 = const()[name = tensor<string, []>("op_2388_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2388_dilations_0 = const()[name = tensor<string, []>("op_2388_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2388_groups_0 = const()[name = tensor<string, []>("op_2388_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_11_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(164506112)))];
+            tensor<fp16, [768]> blocks_11_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165685824)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2388_cast_fp16 = conv(bias = blocks_11_attn_value_bias_to_fp16, dilations = var_2388_dilations_0, groups = var_2388_groups_0, pad = var_2388_pad_0, pad_type = var_2388_pad_type_0, strides = var_2388_strides_0, weight = blocks_11_attn_value_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("op_2388_cast_fp16")];
+            tensor<int32, [12]> tile_33 = const()[name = tensor<string, []>("tile_33"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2391_axis_0 = const()[name = tensor<string, []>("op_2391_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_11 = split(axis = var_2391_axis_0, split_sizes = tile_33, x = var_2390_cast_fp16)[name = tensor<string, []>("op_2391_cast_fp16")];
+            tensor<int32, [4]> var_2404_perm_0 = const()[name = tensor<string, []>("op_2404_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_34 = const()[name = tensor<string, []>("tile_34"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2405_axis_0 = const()[name = tensor<string, []>("op_2405_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_2404_cast_fp16 = transpose(perm = var_2404_perm_0, x = k_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_11 = split(axis = var_2405_axis_0, split_sizes = tile_34, x = var_2404_cast_fp16)[name = tensor<string, []>("op_2405_cast_fp16")];
+            tensor<int32, [12]> tile_35 = const()[name = tensor<string, []>("tile_35"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2418_axis_0 = const()[name = tensor<string, []>("op_2418_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_11 = split(axis = var_2418_axis_0, split_sizes = tile_35, x = var_2388_cast_fp16)[name = tensor<string, []>("op_2418_cast_fp16")];
+            tensor<string, []> aw_265_equation_0 = const()[name = tensor<string, []>("aw_265_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_265_cast_fp16 = einsum(equation = aw_265_equation_0, values = (var_2405_cast_fp16_0, var_2391_cast_fp16_0))[name = tensor<string, []>("aw_265_cast_fp16")];
+            tensor<string, []> aw_267_equation_0 = const()[name = tensor<string, []>("aw_267_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_267_cast_fp16 = einsum(equation = aw_267_equation_0, values = (var_2405_cast_fp16_1, var_2391_cast_fp16_1))[name = tensor<string, []>("aw_267_cast_fp16")];
+            tensor<string, []> aw_269_equation_0 = const()[name = tensor<string, []>("aw_269_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_269_cast_fp16 = einsum(equation = aw_269_equation_0, values = (var_2405_cast_fp16_2, var_2391_cast_fp16_2))[name = tensor<string, []>("aw_269_cast_fp16")];
+            tensor<string, []> aw_271_equation_0 = const()[name = tensor<string, []>("aw_271_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_271_cast_fp16 = einsum(equation = aw_271_equation_0, values = (var_2405_cast_fp16_3, var_2391_cast_fp16_3))[name = tensor<string, []>("aw_271_cast_fp16")];
+            tensor<string, []> aw_273_equation_0 = const()[name = tensor<string, []>("aw_273_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_273_cast_fp16 = einsum(equation = aw_273_equation_0, values = (var_2405_cast_fp16_4, var_2391_cast_fp16_4))[name = tensor<string, []>("aw_273_cast_fp16")];
+            tensor<string, []> aw_275_equation_0 = const()[name = tensor<string, []>("aw_275_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_275_cast_fp16 = einsum(equation = aw_275_equation_0, values = (var_2405_cast_fp16_5, var_2391_cast_fp16_5))[name = tensor<string, []>("aw_275_cast_fp16")];
+            tensor<string, []> aw_277_equation_0 = const()[name = tensor<string, []>("aw_277_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_277_cast_fp16 = einsum(equation = aw_277_equation_0, values = (var_2405_cast_fp16_6, var_2391_cast_fp16_6))[name = tensor<string, []>("aw_277_cast_fp16")];
+            tensor<string, []> aw_279_equation_0 = const()[name = tensor<string, []>("aw_279_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_279_cast_fp16 = einsum(equation = aw_279_equation_0, values = (var_2405_cast_fp16_7, var_2391_cast_fp16_7))[name = tensor<string, []>("aw_279_cast_fp16")];
+            tensor<string, []> aw_281_equation_0 = const()[name = tensor<string, []>("aw_281_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_281_cast_fp16 = einsum(equation = aw_281_equation_0, values = (var_2405_cast_fp16_8, var_2391_cast_fp16_8))[name = tensor<string, []>("aw_281_cast_fp16")];
+            tensor<string, []> aw_283_equation_0 = const()[name = tensor<string, []>("aw_283_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_283_cast_fp16 = einsum(equation = aw_283_equation_0, values = (var_2405_cast_fp16_9, var_2391_cast_fp16_9))[name = tensor<string, []>("aw_283_cast_fp16")];
+            tensor<string, []> aw_285_equation_0 = const()[name = tensor<string, []>("aw_285_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_285_cast_fp16 = einsum(equation = aw_285_equation_0, values = (var_2405_cast_fp16_10, var_2391_cast_fp16_10))[name = tensor<string, []>("aw_285_cast_fp16")];
+            tensor<string, []> aw_equation_0 = const()[name = tensor<string, []>("aw_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_cast_fp16 = einsum(equation = aw_equation_0, values = (var_2405_cast_fp16_11, var_2391_cast_fp16_11))[name = tensor<string, []>("aw_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2455_cast_fp16 = softmax(axis = var_2339, x = aw_265_cast_fp16)[name = tensor<string, []>("op_2455_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2456_cast_fp16 = softmax(axis = var_2339, x = aw_267_cast_fp16)[name = tensor<string, []>("op_2456_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2457_cast_fp16 = softmax(axis = var_2339, x = aw_269_cast_fp16)[name = tensor<string, []>("op_2457_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2458_cast_fp16 = softmax(axis = var_2339, x = aw_271_cast_fp16)[name = tensor<string, []>("op_2458_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2459_cast_fp16 = softmax(axis = var_2339, x = aw_273_cast_fp16)[name = tensor<string, []>("op_2459_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2460_cast_fp16 = softmax(axis = var_2339, x = aw_275_cast_fp16)[name = tensor<string, []>("op_2460_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2461_cast_fp16 = softmax(axis = var_2339, x = aw_277_cast_fp16)[name = tensor<string, []>("op_2461_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2462_cast_fp16 = softmax(axis = var_2339, x = aw_279_cast_fp16)[name = tensor<string, []>("op_2462_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2463_cast_fp16 = softmax(axis = var_2339, x = aw_281_cast_fp16)[name = tensor<string, []>("op_2463_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2464_cast_fp16 = softmax(axis = var_2339, x = aw_283_cast_fp16)[name = tensor<string, []>("op_2464_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2465_cast_fp16 = softmax(axis = var_2339, x = aw_285_cast_fp16)[name = tensor<string, []>("op_2465_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2466_cast_fp16 = softmax(axis = var_2339, x = aw_cast_fp16)[name = tensor<string, []>("op_2466_cast_fp16")];
+            tensor<string, []> var_2468_equation_0 = const()[name = tensor<string, []>("op_2468_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2468_cast_fp16 = einsum(equation = var_2468_equation_0, values = (var_2418_cast_fp16_0, var_2455_cast_fp16))[name = tensor<string, []>("op_2468_cast_fp16")];
+            tensor<string, []> var_2470_equation_0 = const()[name = tensor<string, []>("op_2470_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2470_cast_fp16 = einsum(equation = var_2470_equation_0, values = (var_2418_cast_fp16_1, var_2456_cast_fp16))[name = tensor<string, []>("op_2470_cast_fp16")];
+            tensor<string, []> var_2472_equation_0 = const()[name = tensor<string, []>("op_2472_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2472_cast_fp16 = einsum(equation = var_2472_equation_0, values = (var_2418_cast_fp16_2, var_2457_cast_fp16))[name = tensor<string, []>("op_2472_cast_fp16")];
+            tensor<string, []> var_2474_equation_0 = const()[name = tensor<string, []>("op_2474_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2474_cast_fp16 = einsum(equation = var_2474_equation_0, values = (var_2418_cast_fp16_3, var_2458_cast_fp16))[name = tensor<string, []>("op_2474_cast_fp16")];
+            tensor<string, []> var_2476_equation_0 = const()[name = tensor<string, []>("op_2476_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2476_cast_fp16 = einsum(equation = var_2476_equation_0, values = (var_2418_cast_fp16_4, var_2459_cast_fp16))[name = tensor<string, []>("op_2476_cast_fp16")];
+            tensor<string, []> var_2478_equation_0 = const()[name = tensor<string, []>("op_2478_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2478_cast_fp16 = einsum(equation = var_2478_equation_0, values = (var_2418_cast_fp16_5, var_2460_cast_fp16))[name = tensor<string, []>("op_2478_cast_fp16")];
+            tensor<string, []> var_2480_equation_0 = const()[name = tensor<string, []>("op_2480_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2480_cast_fp16 = einsum(equation = var_2480_equation_0, values = (var_2418_cast_fp16_6, var_2461_cast_fp16))[name = tensor<string, []>("op_2480_cast_fp16")];
+            tensor<string, []> var_2482_equation_0 = const()[name = tensor<string, []>("op_2482_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2482_cast_fp16 = einsum(equation = var_2482_equation_0, values = (var_2418_cast_fp16_7, var_2462_cast_fp16))[name = tensor<string, []>("op_2482_cast_fp16")];
+            tensor<string, []> var_2484_equation_0 = const()[name = tensor<string, []>("op_2484_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2484_cast_fp16 = einsum(equation = var_2484_equation_0, values = (var_2418_cast_fp16_8, var_2463_cast_fp16))[name = tensor<string, []>("op_2484_cast_fp16")];
+            tensor<string, []> var_2486_equation_0 = const()[name = tensor<string, []>("op_2486_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2486_cast_fp16 = einsum(equation = var_2486_equation_0, values = (var_2418_cast_fp16_9, var_2464_cast_fp16))[name = tensor<string, []>("op_2486_cast_fp16")];
+            tensor<string, []> var_2488_equation_0 = const()[name = tensor<string, []>("op_2488_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2488_cast_fp16 = einsum(equation = var_2488_equation_0, values = (var_2418_cast_fp16_10, var_2465_cast_fp16))[name = tensor<string, []>("op_2488_cast_fp16")];
+            tensor<string, []> var_2490_equation_0 = const()[name = tensor<string, []>("op_2490_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2490_cast_fp16 = einsum(equation = var_2490_equation_0, values = (var_2418_cast_fp16_11, var_2466_cast_fp16))[name = tensor<string, []>("op_2490_cast_fp16")];
+            tensor<bool, []> input_115_interleave_0 = const()[name = tensor<string, []>("input_115_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_115_cast_fp16 = concat(axis = var_2339, interleave = input_115_interleave_0, values = (var_2468_cast_fp16, var_2470_cast_fp16, var_2472_cast_fp16, var_2474_cast_fp16, var_2476_cast_fp16, var_2478_cast_fp16, var_2480_cast_fp16, var_2482_cast_fp16, var_2484_cast_fp16, var_2486_cast_fp16, var_2488_cast_fp16, var_2490_cast_fp16))[name = tensor<string, []>("input_115_cast_fp16")];
+            tensor<string, []> var_2499_pad_type_0 = const()[name = tensor<string, []>("op_2499_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2499_strides_0 = const()[name = tensor<string, []>("op_2499_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2499_pad_0 = const()[name = tensor<string, []>("op_2499_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2499_dilations_0 = const()[name = tensor<string, []>("op_2499_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2499_groups_0 = const()[name = tensor<string, []>("op_2499_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_11_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165687424)))];
+            tensor<fp16, [768]> blocks_11_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(166867136)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2499_cast_fp16 = conv(bias = blocks_11_attn_out_bias_to_fp16, dilations = var_2499_dilations_0, groups = var_2499_groups_0, pad = var_2499_pad_0, pad_type = var_2499_pad_type_0, strides = var_2499_strides_0, weight = blocks_11_attn_out_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("op_2499_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = var_2499_cast_fp16)[name = tensor<string, []>("inputs_47_cast_fp16")];
+            tensor<int32, [1]> input_117_axes_0 = const()[name = tensor<string, []>("input_117_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_117_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_117_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(166868736)))];
+            tensor<fp16, [768]> input_117_beta_0_to_fp16 = const()[name = tensor<string, []>("input_117_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(166870336)))];
+            tensor<fp16, []> var_2509_to_fp16 = const()[name = tensor<string, []>("op_2509_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_117_cast_fp16 = layer_norm(axes = input_117_axes_0, beta = input_117_beta_0_to_fp16, epsilon = var_2509_to_fp16, gamma = input_117_gamma_0_to_fp16, x = inputs_47_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
+            tensor<string, []> input_119_pad_type_0 = const()[name = tensor<string, []>("input_119_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_119_strides_0 = const()[name = tensor<string, []>("input_119_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_119_pad_0 = const()[name = tensor<string, []>("input_119_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_119_dilations_0 = const()[name = tensor<string, []>("input_119_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_119_groups_0 = const()[name = tensor<string, []>("input_119_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_11_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(166871936)))];
+            tensor<fp16, [3072]> blocks_11_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171590592)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_119_cast_fp16 = conv(bias = blocks_11_mlp_0_bias_to_fp16, dilations = input_119_dilations_0, groups = input_119_groups_0, pad = input_119_pad_0, pad_type = input_119_pad_type_0, strides = input_119_strides_0, weight = blocks_11_mlp_0_weight_to_fp16, x = input_117_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_119_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<string, []> var_2535_pad_type_0 = const()[name = tensor<string, []>("op_2535_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2535_strides_0 = const()[name = tensor<string, []>("op_2535_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2535_pad_0 = const()[name = tensor<string, []>("op_2535_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2535_dilations_0 = const()[name = tensor<string, []>("op_2535_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2535_groups_0 = const()[name = tensor<string, []>("op_2535_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_11_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171596800)))];
+            tensor<fp16, [768]> blocks_11_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176315456)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2535_cast_fp16 = conv(bias = blocks_11_mlp_2_bias_to_fp16, dilations = var_2535_dilations_0, groups = var_2535_groups_0, pad = var_2535_pad_0, pad_type = var_2535_pad_type_0, strides = var_2535_strides_0, weight = blocks_11_mlp_2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("op_2535_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_cast_fp16 = add(x = inputs_47_cast_fp16, y = var_2535_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<int32, [1]> x_axes_0 = const()[name = tensor<string, []>("x_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> x_gamma_0_to_fp16 = const()[name = tensor<string, []>("x_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176317056)))];
+            tensor<fp16, [768]> x_beta_0_to_fp16 = const()[name = tensor<string, []>("x_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176318656)))];
+            tensor<fp16, []> var_2549_to_fp16 = const()[name = tensor<string, []>("op_2549_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> x_cast_fp16 = layer_norm(axes = x_axes_0, beta = x_beta_0_to_fp16, epsilon = var_2549_to_fp16, gamma = x_gamma_0_to_fp16, x = inputs_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
+            tensor<int32, [1]> var_2560_axes_0 = const()[name = tensor<string, []>("op_2560_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 768, 1500]> var_2560_cast_fp16 = squeeze(axes = var_2560_axes_0, x = x_cast_fp16)[name = tensor<string, []>("op_2560_cast_fp16")];
+            tensor<int32, [3]> var_2563_perm_0 = const()[name = tensor<string, []>("op_2563_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<string, []> var_2563_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_2563_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
+            tensor<fp16, [1, 1500, 768]> var_2563_cast_fp16 = transpose(perm = var_2563_perm_0, x = var_2560_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp32, [1, 1500, 768]> output = cast(dtype = var_2563_cast_fp16_to_fp32_dtype_0, x = var_2563_cast_fp16)[name = tensor<string, []>("cast_51")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/small.en/ggml-small.en-encoder.mlmodelc/weights/weight.bin b/small.en/ggml-small.en-encoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f1d3a28e11bdb37e8dcd5967ca78c5e0b5b2a3e1
--- /dev/null
+++ b/small.en/ggml-small.en-encoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57d5901ae064655c83da5a7281e643d56974fd875e986e6bd7997038e65e428a
+size 176320256
diff --git a/small.en/ggml-small.en.bin b/small.en/ggml-small.en.bin
new file mode 100644
index 0000000000000000000000000000000000000000..eaeeb6d63378cf6515ff2c1cb4e33486ae6bcc2f
--- /dev/null
+++ b/small.en/ggml-small.en.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6138d6d58ecc8322097e0f987c32f1be8bb0a18532a3f88f734d1bbf9c41e5d
+size 487614201
diff --git a/small/.DS_Store b/small/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..43ac60a654a7916477d24dfceb59df85052f2077
Binary files /dev/null and b/small/.DS_Store differ
diff --git a/small/ggml-small-encoder.mlmodelc/analytics/coremldata.bin b/small/ggml-small-encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b15b26f7f6a3c8a8718cc69c7c4ff81a9990881e
--- /dev/null
+++ b/small/ggml-small-encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ea4d5727e7b1c8762c548f4b80a30447740200e986075b056cf3da16359da56
+size 243
diff --git a/small/ggml-small-encoder.mlmodelc/coremldata.bin b/small/ggml-small-encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2aecfa7c477de9fb63dc789d720d5e2fb0e72d4a
--- /dev/null
+++ b/small/ggml-small-encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a9554f8fae587d40c808e5a6411d79ce20ab6b473d7a3a88df2cd8123978fb4
+size 320
diff --git a/small/ggml-small-encoder.mlmodelc/metadata.json b/small/ggml-small-encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..51befae09ac573051582aa395e8d1449589dbc6b
--- /dev/null
+++ b/small/ggml-small-encoder.mlmodelc/metadata.json
@@ -0,0 +1,71 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1500 × 768)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 768]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 6,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 12,
+      "Gelu" : 14,
+      "LayerNorm" : 25,
+      "Transpose" : 13,
+      "Softmax" : 144,
+      "Squeeze" : 1,
+      "Cast" : 2,
+      "Add" : 25,
+      "Einsum" : 288,
+      "ExpandDims" : 1,
+      "Split" : 36,
+      "Conv" : 74
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "12.0",
+      "tvOS" : "15.0",
+      "visionOS" : "1.0",
+      "watchOS" : "8.0",
+      "iOS" : "15.0",
+      "macCatalyst" : "15.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.version" : "8.3.0",
+      "com.github.apple.coremltools.source" : "torch==2.2.2",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 80 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "coreml_encoder_small",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/small/ggml-small-encoder.mlmodelc/model.mil b/small/ggml-small-encoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..0c32f029586d9600dccb30b0eeece3bf02323585
--- /dev/null
+++ b/small/ggml-small-encoder.mlmodelc/model.mil
@@ -0,0 +1,1663 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.2.2"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
+{
+    func main<ios15>(tensor<fp32, [1, 80, 3000]> logmel_data) {
+            tensor<string, []> var_44_pad_type_0 = const()[name = tensor<string, []>("op_44_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_44_pad_0 = const()[name = tensor<string, []>("op_44_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_44_strides_0 = const()[name = tensor<string, []>("op_44_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_44_dilations_0 = const()[name = tensor<string, []>("op_44_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_44_groups_0 = const()[name = tensor<string, []>("op_44_groups_0"), val = tensor<int32, []>(1)];
+            tensor<string, []> logmel_data_to_fp16_dtype_0 = const()[name = tensor<string, []>("logmel_data_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [768, 80, 3]> const_0_to_fp16 = const()[name = tensor<string, []>("const_0_to_fp16"), val = tensor<fp16, [768, 80, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [768]> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(368768)))];
+            tensor<fp16, [1, 80, 3000]> logmel_data_to_fp16 = cast(dtype = logmel_data_to_fp16_dtype_0, x = logmel_data)[name = tensor<string, []>("cast_52")];
+            tensor<fp16, [1, 768, 3000]> var_44_cast_fp16 = conv(bias = const_1_to_fp16, dilations = var_44_dilations_0, groups = var_44_groups_0, pad = var_44_pad_0, pad_type = var_44_pad_type_0, strides = var_44_strides_0, weight = const_0_to_fp16, x = logmel_data_to_fp16)[name = tensor<string, []>("op_44_cast_fp16")];
+            tensor<string, []> input_1_mode_0 = const()[name = tensor<string, []>("input_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 768, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_44_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<string, []> var_62_pad_type_0 = const()[name = tensor<string, []>("op_62_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_62_pad_0 = const()[name = tensor<string, []>("op_62_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_62_strides_0 = const()[name = tensor<string, []>("op_62_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_62_dilations_0 = const()[name = tensor<string, []>("op_62_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_62_groups_0 = const()[name = tensor<string, []>("op_62_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 3]> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, [768, 768, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(370368)))];
+            tensor<fp16, [768]> const_3_to_fp16 = const()[name = tensor<string, []>("const_3_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3909376)))];
+            tensor<fp16, [1, 768, 1500]> var_62_cast_fp16 = conv(bias = const_3_to_fp16, dilations = var_62_dilations_0, groups = var_62_groups_0, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_62_strides_0, weight = const_2_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("op_62_cast_fp16")];
+            tensor<string, []> x_3_mode_0 = const()[name = tensor<string, []>("x_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 768, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_62_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
+            tensor<fp16, [768, 1500]> var_67_to_fp16 = const()[name = tensor<string, []>("op_67_to_fp16"), val = tensor<fp16, [768, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3910976)))];
+            tensor<fp16, [1, 768, 1500]> var_69_cast_fp16 = add(x = x_3_cast_fp16, y = var_67_to_fp16)[name = tensor<string, []>("op_69_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_69_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_84 = const()[name = tensor<string, []>("op_84"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6215040)))];
+            tensor<fp16, [768]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6216640)))];
+            tensor<fp16, []> var_100_to_fp16 = const()[name = tensor<string, []>("op_100_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_3_cast_fp16 = layer_norm(axes = input_3_axes_0, beta = input_3_beta_0_to_fp16, epsilon = var_100_to_fp16, gamma = input_3_gamma_0_to_fp16, x = inputs_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<string, []> q_1_pad_type_0 = const()[name = tensor<string, []>("q_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_1_strides_0 = const()[name = tensor<string, []>("q_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_1_pad_0 = const()[name = tensor<string, []>("q_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_1_dilations_0 = const()[name = tensor<string, []>("q_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_1_groups_0 = const()[name = tensor<string, []>("q_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_135_weight_0_to_fp16 = const()[name = tensor<string, []>("op_135_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6218240)))];
+            tensor<fp16, [768]> var_135_bias_0_to_fp16 = const()[name = tensor<string, []>("op_135_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7397952)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_135_cast_fp16 = conv(bias = var_135_bias_0_to_fp16, dilations = q_1_dilations_0, groups = q_1_groups_0, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = q_1_strides_0, weight = var_135_weight_0_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_135_cast_fp16")];
+            tensor<string, []> k_1_pad_type_0 = const()[name = tensor<string, []>("k_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_1_strides_0 = const()[name = tensor<string, []>("k_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_1_pad_0 = const()[name = tensor<string, []>("k_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_1_dilations_0 = const()[name = tensor<string, []>("k_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_1_groups_0 = const()[name = tensor<string, []>("k_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_0_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7399552)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_1_cast_fp16 = conv(dilations = k_1_dilations_0, groups = k_1_groups_0, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = k_1_strides_0, weight = blocks_0_attn_key_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("k_1_cast_fp16")];
+            tensor<string, []> var_133_pad_type_0 = const()[name = tensor<string, []>("op_133_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_133_strides_0 = const()[name = tensor<string, []>("op_133_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_133_pad_0 = const()[name = tensor<string, []>("op_133_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_133_dilations_0 = const()[name = tensor<string, []>("op_133_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_133_groups_0 = const()[name = tensor<string, []>("op_133_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_0_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8579264)))];
+            tensor<fp16, [768]> blocks_0_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9758976)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_133_cast_fp16 = conv(bias = blocks_0_attn_value_bias_to_fp16, dilations = var_133_dilations_0, groups = var_133_groups_0, pad = var_133_pad_0, pad_type = var_133_pad_type_0, strides = var_133_strides_0, weight = blocks_0_attn_value_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_133_cast_fp16")];
+            tensor<int32, [12]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_136_axis_0 = const()[name = tensor<string, []>("op_136_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_136_cast_fp16_11 = split(axis = var_136_axis_0, split_sizes = tile_0, x = var_135_cast_fp16)[name = tensor<string, []>("op_136_cast_fp16")];
+            tensor<int32, [4]> var_149_perm_0 = const()[name = tensor<string, []>("op_149_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_150_axis_0 = const()[name = tensor<string, []>("op_150_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_149_cast_fp16 = transpose(perm = var_149_perm_0, x = k_1_cast_fp16)[name = tensor<string, []>("transpose_12")];
+            tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_150_cast_fp16_11 = split(axis = var_150_axis_0, split_sizes = tile_1, x = var_149_cast_fp16)[name = tensor<string, []>("op_150_cast_fp16")];
+            tensor<int32, [12]> tile_2 = const()[name = tensor<string, []>("tile_2"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_163_axis_0 = const()[name = tensor<string, []>("op_163_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16_11 = split(axis = var_163_axis_0, split_sizes = tile_2, x = var_133_cast_fp16)[name = tensor<string, []>("op_163_cast_fp16")];
+            tensor<string, []> aw_1_equation_0 = const()[name = tensor<string, []>("aw_1_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1_cast_fp16 = einsum(equation = aw_1_equation_0, values = (var_150_cast_fp16_0, var_136_cast_fp16_0))[name = tensor<string, []>("aw_1_cast_fp16")];
+            tensor<string, []> aw_3_equation_0 = const()[name = tensor<string, []>("aw_3_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_3_cast_fp16 = einsum(equation = aw_3_equation_0, values = (var_150_cast_fp16_1, var_136_cast_fp16_1))[name = tensor<string, []>("aw_3_cast_fp16")];
+            tensor<string, []> aw_5_equation_0 = const()[name = tensor<string, []>("aw_5_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_5_cast_fp16 = einsum(equation = aw_5_equation_0, values = (var_150_cast_fp16_2, var_136_cast_fp16_2))[name = tensor<string, []>("aw_5_cast_fp16")];
+            tensor<string, []> aw_7_equation_0 = const()[name = tensor<string, []>("aw_7_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_7_cast_fp16 = einsum(equation = aw_7_equation_0, values = (var_150_cast_fp16_3, var_136_cast_fp16_3))[name = tensor<string, []>("aw_7_cast_fp16")];
+            tensor<string, []> aw_9_equation_0 = const()[name = tensor<string, []>("aw_9_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_9_cast_fp16 = einsum(equation = aw_9_equation_0, values = (var_150_cast_fp16_4, var_136_cast_fp16_4))[name = tensor<string, []>("aw_9_cast_fp16")];
+            tensor<string, []> aw_11_equation_0 = const()[name = tensor<string, []>("aw_11_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_11_cast_fp16 = einsum(equation = aw_11_equation_0, values = (var_150_cast_fp16_5, var_136_cast_fp16_5))[name = tensor<string, []>("aw_11_cast_fp16")];
+            tensor<string, []> aw_13_equation_0 = const()[name = tensor<string, []>("aw_13_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_13_cast_fp16 = einsum(equation = aw_13_equation_0, values = (var_150_cast_fp16_6, var_136_cast_fp16_6))[name = tensor<string, []>("aw_13_cast_fp16")];
+            tensor<string, []> aw_15_equation_0 = const()[name = tensor<string, []>("aw_15_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_15_cast_fp16 = einsum(equation = aw_15_equation_0, values = (var_150_cast_fp16_7, var_136_cast_fp16_7))[name = tensor<string, []>("aw_15_cast_fp16")];
+            tensor<string, []> aw_17_equation_0 = const()[name = tensor<string, []>("aw_17_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_17_cast_fp16 = einsum(equation = aw_17_equation_0, values = (var_150_cast_fp16_8, var_136_cast_fp16_8))[name = tensor<string, []>("aw_17_cast_fp16")];
+            tensor<string, []> aw_19_equation_0 = const()[name = tensor<string, []>("aw_19_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_19_cast_fp16 = einsum(equation = aw_19_equation_0, values = (var_150_cast_fp16_9, var_136_cast_fp16_9))[name = tensor<string, []>("aw_19_cast_fp16")];
+            tensor<string, []> aw_21_equation_0 = const()[name = tensor<string, []>("aw_21_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_21_cast_fp16 = einsum(equation = aw_21_equation_0, values = (var_150_cast_fp16_10, var_136_cast_fp16_10))[name = tensor<string, []>("aw_21_cast_fp16")];
+            tensor<string, []> aw_23_equation_0 = const()[name = tensor<string, []>("aw_23_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_23_cast_fp16 = einsum(equation = aw_23_equation_0, values = (var_150_cast_fp16_11, var_136_cast_fp16_11))[name = tensor<string, []>("aw_23_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_200_cast_fp16 = softmax(axis = var_84, x = aw_1_cast_fp16)[name = tensor<string, []>("op_200_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_201_cast_fp16 = softmax(axis = var_84, x = aw_3_cast_fp16)[name = tensor<string, []>("op_201_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_202_cast_fp16 = softmax(axis = var_84, x = aw_5_cast_fp16)[name = tensor<string, []>("op_202_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_203_cast_fp16 = softmax(axis = var_84, x = aw_7_cast_fp16)[name = tensor<string, []>("op_203_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_204_cast_fp16 = softmax(axis = var_84, x = aw_9_cast_fp16)[name = tensor<string, []>("op_204_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_205_cast_fp16 = softmax(axis = var_84, x = aw_11_cast_fp16)[name = tensor<string, []>("op_205_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_206_cast_fp16 = softmax(axis = var_84, x = aw_13_cast_fp16)[name = tensor<string, []>("op_206_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_207_cast_fp16 = softmax(axis = var_84, x = aw_15_cast_fp16)[name = tensor<string, []>("op_207_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_208_cast_fp16 = softmax(axis = var_84, x = aw_17_cast_fp16)[name = tensor<string, []>("op_208_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_209_cast_fp16 = softmax(axis = var_84, x = aw_19_cast_fp16)[name = tensor<string, []>("op_209_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_210_cast_fp16 = softmax(axis = var_84, x = aw_21_cast_fp16)[name = tensor<string, []>("op_210_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_211_cast_fp16 = softmax(axis = var_84, x = aw_23_cast_fp16)[name = tensor<string, []>("op_211_cast_fp16")];
+            tensor<string, []> var_213_equation_0 = const()[name = tensor<string, []>("op_213_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_213_cast_fp16 = einsum(equation = var_213_equation_0, values = (var_163_cast_fp16_0, var_200_cast_fp16))[name = tensor<string, []>("op_213_cast_fp16")];
+            tensor<string, []> var_215_equation_0 = const()[name = tensor<string, []>("op_215_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_215_cast_fp16 = einsum(equation = var_215_equation_0, values = (var_163_cast_fp16_1, var_201_cast_fp16))[name = tensor<string, []>("op_215_cast_fp16")];
+            tensor<string, []> var_217_equation_0 = const()[name = tensor<string, []>("op_217_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_217_cast_fp16 = einsum(equation = var_217_equation_0, values = (var_163_cast_fp16_2, var_202_cast_fp16))[name = tensor<string, []>("op_217_cast_fp16")];
+            tensor<string, []> var_219_equation_0 = const()[name = tensor<string, []>("op_219_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_219_cast_fp16 = einsum(equation = var_219_equation_0, values = (var_163_cast_fp16_3, var_203_cast_fp16))[name = tensor<string, []>("op_219_cast_fp16")];
+            tensor<string, []> var_221_equation_0 = const()[name = tensor<string, []>("op_221_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_221_cast_fp16 = einsum(equation = var_221_equation_0, values = (var_163_cast_fp16_4, var_204_cast_fp16))[name = tensor<string, []>("op_221_cast_fp16")];
+            tensor<string, []> var_223_equation_0 = const()[name = tensor<string, []>("op_223_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_223_cast_fp16 = einsum(equation = var_223_equation_0, values = (var_163_cast_fp16_5, var_205_cast_fp16))[name = tensor<string, []>("op_223_cast_fp16")];
+            tensor<string, []> var_225_equation_0 = const()[name = tensor<string, []>("op_225_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_225_cast_fp16 = einsum(equation = var_225_equation_0, values = (var_163_cast_fp16_6, var_206_cast_fp16))[name = tensor<string, []>("op_225_cast_fp16")];
+            tensor<string, []> var_227_equation_0 = const()[name = tensor<string, []>("op_227_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_227_cast_fp16 = einsum(equation = var_227_equation_0, values = (var_163_cast_fp16_7, var_207_cast_fp16))[name = tensor<string, []>("op_227_cast_fp16")];
+            tensor<string, []> var_229_equation_0 = const()[name = tensor<string, []>("op_229_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_229_cast_fp16 = einsum(equation = var_229_equation_0, values = (var_163_cast_fp16_8, var_208_cast_fp16))[name = tensor<string, []>("op_229_cast_fp16")];
+            tensor<string, []> var_231_equation_0 = const()[name = tensor<string, []>("op_231_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_231_cast_fp16 = einsum(equation = var_231_equation_0, values = (var_163_cast_fp16_9, var_209_cast_fp16))[name = tensor<string, []>("op_231_cast_fp16")];
+            tensor<string, []> var_233_equation_0 = const()[name = tensor<string, []>("op_233_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_233_cast_fp16 = einsum(equation = var_233_equation_0, values = (var_163_cast_fp16_10, var_210_cast_fp16))[name = tensor<string, []>("op_233_cast_fp16")];
+            tensor<string, []> var_235_equation_0 = const()[name = tensor<string, []>("op_235_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_235_cast_fp16 = einsum(equation = var_235_equation_0, values = (var_163_cast_fp16_11, var_211_cast_fp16))[name = tensor<string, []>("op_235_cast_fp16")];
+            tensor<bool, []> input_5_interleave_0 = const()[name = tensor<string, []>("input_5_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_5_cast_fp16 = concat(axis = var_84, interleave = input_5_interleave_0, values = (var_213_cast_fp16, var_215_cast_fp16, var_217_cast_fp16, var_219_cast_fp16, var_221_cast_fp16, var_223_cast_fp16, var_225_cast_fp16, var_227_cast_fp16, var_229_cast_fp16, var_231_cast_fp16, var_233_cast_fp16, var_235_cast_fp16))[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> var_244_pad_type_0 = const()[name = tensor<string, []>("op_244_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_244_strides_0 = const()[name = tensor<string, []>("op_244_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_244_pad_0 = const()[name = tensor<string, []>("op_244_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_244_dilations_0 = const()[name = tensor<string, []>("op_244_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_244_groups_0 = const()[name = tensor<string, []>("op_244_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_0_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9760576)))];
+            tensor<fp16, [768]> blocks_0_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10940288)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_244_cast_fp16 = conv(bias = blocks_0_attn_out_bias_to_fp16, dilations = var_244_dilations_0, groups = var_244_groups_0, pad = var_244_pad_0, pad_type = var_244_pad_type_0, strides = var_244_strides_0, weight = blocks_0_attn_out_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("op_244_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = var_244_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> input_7_axes_0 = const()[name = tensor<string, []>("input_7_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_7_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_7_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10941888)))];
+            tensor<fp16, [768]> input_7_beta_0_to_fp16 = const()[name = tensor<string, []>("input_7_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10943488)))];
+            tensor<fp16, []> var_254_to_fp16 = const()[name = tensor<string, []>("op_254_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = input_7_beta_0_to_fp16, epsilon = var_254_to_fp16, gamma = input_7_gamma_0_to_fp16, x = inputs_3_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_pad_type_0 = const()[name = tensor<string, []>("input_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_9_strides_0 = const()[name = tensor<string, []>("input_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_9_pad_0 = const()[name = tensor<string, []>("input_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_9_dilations_0 = const()[name = tensor<string, []>("input_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_9_groups_0 = const()[name = tensor<string, []>("input_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_0_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10945088)))];
+            tensor<fp16, [3072]> blocks_0_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15663744)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_9_cast_fp16 = conv(bias = blocks_0_mlp_0_bias_to_fp16, dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = blocks_0_mlp_0_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<string, []> input_11_mode_0 = const()[name = tensor<string, []>("input_11_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_11_cast_fp16 = gelu(mode = input_11_mode_0, x = input_9_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<string, []> var_280_pad_type_0 = const()[name = tensor<string, []>("op_280_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_280_strides_0 = const()[name = tensor<string, []>("op_280_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_280_pad_0 = const()[name = tensor<string, []>("op_280_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_280_dilations_0 = const()[name = tensor<string, []>("op_280_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_280_groups_0 = const()[name = tensor<string, []>("op_280_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_0_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15669952)))];
+            tensor<fp16, [768]> blocks_0_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20388608)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_280_cast_fp16 = conv(bias = blocks_0_mlp_2_bias_to_fp16, dilations = var_280_dilations_0, groups = var_280_groups_0, pad = var_280_pad_0, pad_type = var_280_pad_type_0, strides = var_280_strides_0, weight = blocks_0_mlp_2_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("op_280_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = var_280_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_289 = const()[name = tensor<string, []>("op_289"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_13_axes_0 = const()[name = tensor<string, []>("input_13_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_13_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20390208)))];
+            tensor<fp16, [768]> input_13_beta_0_to_fp16 = const()[name = tensor<string, []>("input_13_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20391808)))];
+            tensor<fp16, []> var_305_to_fp16 = const()[name = tensor<string, []>("op_305_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = input_13_beta_0_to_fp16, epsilon = var_305_to_fp16, gamma = input_13_gamma_0_to_fp16, x = inputs_5_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> q_3_pad_type_0 = const()[name = tensor<string, []>("q_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_3_strides_0 = const()[name = tensor<string, []>("q_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_3_pad_0 = const()[name = tensor<string, []>("q_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_3_dilations_0 = const()[name = tensor<string, []>("q_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_3_groups_0 = const()[name = tensor<string, []>("q_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_340_weight_0_to_fp16 = const()[name = tensor<string, []>("op_340_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20393408)))];
+            tensor<fp16, [768]> var_340_bias_0_to_fp16 = const()[name = tensor<string, []>("op_340_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21573120)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_340_cast_fp16 = conv(bias = var_340_bias_0_to_fp16, dilations = q_3_dilations_0, groups = q_3_groups_0, pad = q_3_pad_0, pad_type = q_3_pad_type_0, strides = q_3_strides_0, weight = var_340_weight_0_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_340_cast_fp16")];
+            tensor<string, []> k_3_pad_type_0 = const()[name = tensor<string, []>("k_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_3_strides_0 = const()[name = tensor<string, []>("k_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_3_pad_0 = const()[name = tensor<string, []>("k_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_3_dilations_0 = const()[name = tensor<string, []>("k_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_3_groups_0 = const()[name = tensor<string, []>("k_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_1_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21574720)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_3_cast_fp16 = conv(dilations = k_3_dilations_0, groups = k_3_groups_0, pad = k_3_pad_0, pad_type = k_3_pad_type_0, strides = k_3_strides_0, weight = blocks_1_attn_key_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("k_3_cast_fp16")];
+            tensor<string, []> var_338_pad_type_0 = const()[name = tensor<string, []>("op_338_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_338_strides_0 = const()[name = tensor<string, []>("op_338_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_338_pad_0 = const()[name = tensor<string, []>("op_338_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_338_dilations_0 = const()[name = tensor<string, []>("op_338_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_338_groups_0 = const()[name = tensor<string, []>("op_338_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_1_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(22754432)))];
+            tensor<fp16, [768]> blocks_1_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23934144)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_338_cast_fp16 = conv(bias = blocks_1_attn_value_bias_to_fp16, dilations = var_338_dilations_0, groups = var_338_groups_0, pad = var_338_pad_0, pad_type = var_338_pad_type_0, strides = var_338_strides_0, weight = blocks_1_attn_value_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_338_cast_fp16")];
+            tensor<int32, [12]> tile_3 = const()[name = tensor<string, []>("tile_3"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_341_axis_0 = const()[name = tensor<string, []>("op_341_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_341_cast_fp16_11 = split(axis = var_341_axis_0, split_sizes = tile_3, x = var_340_cast_fp16)[name = tensor<string, []>("op_341_cast_fp16")];
+            tensor<int32, [4]> var_354_perm_0 = const()[name = tensor<string, []>("op_354_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_4 = const()[name = tensor<string, []>("tile_4"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_355_axis_0 = const()[name = tensor<string, []>("op_355_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_354_cast_fp16 = transpose(perm = var_354_perm_0, x = k_3_cast_fp16)[name = tensor<string, []>("transpose_11")];
+            tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_355_cast_fp16_11 = split(axis = var_355_axis_0, split_sizes = tile_4, x = var_354_cast_fp16)[name = tensor<string, []>("op_355_cast_fp16")];
+            tensor<int32, [12]> tile_5 = const()[name = tensor<string, []>("tile_5"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_368_axis_0 = const()[name = tensor<string, []>("op_368_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_368_cast_fp16_11 = split(axis = var_368_axis_0, split_sizes = tile_5, x = var_338_cast_fp16)[name = tensor<string, []>("op_368_cast_fp16")];
+            tensor<string, []> aw_25_equation_0 = const()[name = tensor<string, []>("aw_25_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_25_cast_fp16 = einsum(equation = aw_25_equation_0, values = (var_355_cast_fp16_0, var_341_cast_fp16_0))[name = tensor<string, []>("aw_25_cast_fp16")];
+            tensor<string, []> aw_27_equation_0 = const()[name = tensor<string, []>("aw_27_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_27_cast_fp16 = einsum(equation = aw_27_equation_0, values = (var_355_cast_fp16_1, var_341_cast_fp16_1))[name = tensor<string, []>("aw_27_cast_fp16")];
+            tensor<string, []> aw_29_equation_0 = const()[name = tensor<string, []>("aw_29_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_29_cast_fp16 = einsum(equation = aw_29_equation_0, values = (var_355_cast_fp16_2, var_341_cast_fp16_2))[name = tensor<string, []>("aw_29_cast_fp16")];
+            tensor<string, []> aw_31_equation_0 = const()[name = tensor<string, []>("aw_31_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_31_cast_fp16 = einsum(equation = aw_31_equation_0, values = (var_355_cast_fp16_3, var_341_cast_fp16_3))[name = tensor<string, []>("aw_31_cast_fp16")];
+            tensor<string, []> aw_33_equation_0 = const()[name = tensor<string, []>("aw_33_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_33_cast_fp16 = einsum(equation = aw_33_equation_0, values = (var_355_cast_fp16_4, var_341_cast_fp16_4))[name = tensor<string, []>("aw_33_cast_fp16")];
+            tensor<string, []> aw_35_equation_0 = const()[name = tensor<string, []>("aw_35_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_35_cast_fp16 = einsum(equation = aw_35_equation_0, values = (var_355_cast_fp16_5, var_341_cast_fp16_5))[name = tensor<string, []>("aw_35_cast_fp16")];
+            tensor<string, []> aw_37_equation_0 = const()[name = tensor<string, []>("aw_37_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_37_cast_fp16 = einsum(equation = aw_37_equation_0, values = (var_355_cast_fp16_6, var_341_cast_fp16_6))[name = tensor<string, []>("aw_37_cast_fp16")];
+            tensor<string, []> aw_39_equation_0 = const()[name = tensor<string, []>("aw_39_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_39_cast_fp16 = einsum(equation = aw_39_equation_0, values = (var_355_cast_fp16_7, var_341_cast_fp16_7))[name = tensor<string, []>("aw_39_cast_fp16")];
+            tensor<string, []> aw_41_equation_0 = const()[name = tensor<string, []>("aw_41_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_41_cast_fp16 = einsum(equation = aw_41_equation_0, values = (var_355_cast_fp16_8, var_341_cast_fp16_8))[name = tensor<string, []>("aw_41_cast_fp16")];
+            tensor<string, []> aw_43_equation_0 = const()[name = tensor<string, []>("aw_43_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_43_cast_fp16 = einsum(equation = aw_43_equation_0, values = (var_355_cast_fp16_9, var_341_cast_fp16_9))[name = tensor<string, []>("aw_43_cast_fp16")];
+            tensor<string, []> aw_45_equation_0 = const()[name = tensor<string, []>("aw_45_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_45_cast_fp16 = einsum(equation = aw_45_equation_0, values = (var_355_cast_fp16_10, var_341_cast_fp16_10))[name = tensor<string, []>("aw_45_cast_fp16")];
+            tensor<string, []> aw_47_equation_0 = const()[name = tensor<string, []>("aw_47_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_47_cast_fp16 = einsum(equation = aw_47_equation_0, values = (var_355_cast_fp16_11, var_341_cast_fp16_11))[name = tensor<string, []>("aw_47_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_405_cast_fp16 = softmax(axis = var_289, x = aw_25_cast_fp16)[name = tensor<string, []>("op_405_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_406_cast_fp16 = softmax(axis = var_289, x = aw_27_cast_fp16)[name = tensor<string, []>("op_406_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_407_cast_fp16 = softmax(axis = var_289, x = aw_29_cast_fp16)[name = tensor<string, []>("op_407_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_408_cast_fp16 = softmax(axis = var_289, x = aw_31_cast_fp16)[name = tensor<string, []>("op_408_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_409_cast_fp16 = softmax(axis = var_289, x = aw_33_cast_fp16)[name = tensor<string, []>("op_409_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_410_cast_fp16 = softmax(axis = var_289, x = aw_35_cast_fp16)[name = tensor<string, []>("op_410_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_411_cast_fp16 = softmax(axis = var_289, x = aw_37_cast_fp16)[name = tensor<string, []>("op_411_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_412_cast_fp16 = softmax(axis = var_289, x = aw_39_cast_fp16)[name = tensor<string, []>("op_412_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_413_cast_fp16 = softmax(axis = var_289, x = aw_41_cast_fp16)[name = tensor<string, []>("op_413_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_414_cast_fp16 = softmax(axis = var_289, x = aw_43_cast_fp16)[name = tensor<string, []>("op_414_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_415_cast_fp16 = softmax(axis = var_289, x = aw_45_cast_fp16)[name = tensor<string, []>("op_415_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_416_cast_fp16 = softmax(axis = var_289, x = aw_47_cast_fp16)[name = tensor<string, []>("op_416_cast_fp16")];
+            tensor<string, []> var_418_equation_0 = const()[name = tensor<string, []>("op_418_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_418_cast_fp16 = einsum(equation = var_418_equation_0, values = (var_368_cast_fp16_0, var_405_cast_fp16))[name = tensor<string, []>("op_418_cast_fp16")];
+            tensor<string, []> var_420_equation_0 = const()[name = tensor<string, []>("op_420_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_420_cast_fp16 = einsum(equation = var_420_equation_0, values = (var_368_cast_fp16_1, var_406_cast_fp16))[name = tensor<string, []>("op_420_cast_fp16")];
+            tensor<string, []> var_422_equation_0 = const()[name = tensor<string, []>("op_422_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_422_cast_fp16 = einsum(equation = var_422_equation_0, values = (var_368_cast_fp16_2, var_407_cast_fp16))[name = tensor<string, []>("op_422_cast_fp16")];
+            tensor<string, []> var_424_equation_0 = const()[name = tensor<string, []>("op_424_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_424_cast_fp16 = einsum(equation = var_424_equation_0, values = (var_368_cast_fp16_3, var_408_cast_fp16))[name = tensor<string, []>("op_424_cast_fp16")];
+            tensor<string, []> var_426_equation_0 = const()[name = tensor<string, []>("op_426_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_426_cast_fp16 = einsum(equation = var_426_equation_0, values = (var_368_cast_fp16_4, var_409_cast_fp16))[name = tensor<string, []>("op_426_cast_fp16")];
+            tensor<string, []> var_428_equation_0 = const()[name = tensor<string, []>("op_428_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_428_cast_fp16 = einsum(equation = var_428_equation_0, values = (var_368_cast_fp16_5, var_410_cast_fp16))[name = tensor<string, []>("op_428_cast_fp16")];
+            tensor<string, []> var_430_equation_0 = const()[name = tensor<string, []>("op_430_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_430_cast_fp16 = einsum(equation = var_430_equation_0, values = (var_368_cast_fp16_6, var_411_cast_fp16))[name = tensor<string, []>("op_430_cast_fp16")];
+            tensor<string, []> var_432_equation_0 = const()[name = tensor<string, []>("op_432_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_432_cast_fp16 = einsum(equation = var_432_equation_0, values = (var_368_cast_fp16_7, var_412_cast_fp16))[name = tensor<string, []>("op_432_cast_fp16")];
+            tensor<string, []> var_434_equation_0 = const()[name = tensor<string, []>("op_434_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_434_cast_fp16 = einsum(equation = var_434_equation_0, values = (var_368_cast_fp16_8, var_413_cast_fp16))[name = tensor<string, []>("op_434_cast_fp16")];
+            tensor<string, []> var_436_equation_0 = const()[name = tensor<string, []>("op_436_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_436_cast_fp16 = einsum(equation = var_436_equation_0, values = (var_368_cast_fp16_9, var_414_cast_fp16))[name = tensor<string, []>("op_436_cast_fp16")];
+            tensor<string, []> var_438_equation_0 = const()[name = tensor<string, []>("op_438_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_438_cast_fp16 = einsum(equation = var_438_equation_0, values = (var_368_cast_fp16_10, var_415_cast_fp16))[name = tensor<string, []>("op_438_cast_fp16")];
+            tensor<string, []> var_440_equation_0 = const()[name = tensor<string, []>("op_440_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_440_cast_fp16 = einsum(equation = var_440_equation_0, values = (var_368_cast_fp16_11, var_416_cast_fp16))[name = tensor<string, []>("op_440_cast_fp16")];
+            tensor<bool, []> input_15_interleave_0 = const()[name = tensor<string, []>("input_15_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_15_cast_fp16 = concat(axis = var_289, interleave = input_15_interleave_0, values = (var_418_cast_fp16, var_420_cast_fp16, var_422_cast_fp16, var_424_cast_fp16, var_426_cast_fp16, var_428_cast_fp16, var_430_cast_fp16, var_432_cast_fp16, var_434_cast_fp16, var_436_cast_fp16, var_438_cast_fp16, var_440_cast_fp16))[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<string, []> var_449_pad_type_0 = const()[name = tensor<string, []>("op_449_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_449_strides_0 = const()[name = tensor<string, []>("op_449_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_449_pad_0 = const()[name = tensor<string, []>("op_449_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_449_dilations_0 = const()[name = tensor<string, []>("op_449_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_449_groups_0 = const()[name = tensor<string, []>("op_449_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_1_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23935744)))];
+            tensor<fp16, [768]> blocks_1_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25115456)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_449_cast_fp16 = conv(bias = blocks_1_attn_out_bias_to_fp16, dilations = var_449_dilations_0, groups = var_449_groups_0, pad = var_449_pad_0, pad_type = var_449_pad_type_0, strides = var_449_strides_0, weight = blocks_1_attn_out_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("op_449_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = var_449_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> input_17_axes_0 = const()[name = tensor<string, []>("input_17_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_17_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_17_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25117056)))];
+            tensor<fp16, [768]> input_17_beta_0_to_fp16 = const()[name = tensor<string, []>("input_17_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25118656)))];
+            tensor<fp16, []> var_459_to_fp16 = const()[name = tensor<string, []>("op_459_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_17_cast_fp16 = layer_norm(axes = input_17_axes_0, beta = input_17_beta_0_to_fp16, epsilon = var_459_to_fp16, gamma = input_17_gamma_0_to_fp16, x = inputs_7_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_pad_type_0 = const()[name = tensor<string, []>("input_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_19_strides_0 = const()[name = tensor<string, []>("input_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_19_pad_0 = const()[name = tensor<string, []>("input_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_19_dilations_0 = const()[name = tensor<string, []>("input_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_19_groups_0 = const()[name = tensor<string, []>("input_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_1_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25120256)))];
+            tensor<fp16, [3072]> blocks_1_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29838912)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_19_cast_fp16 = conv(bias = blocks_1_mlp_0_bias_to_fp16, dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = blocks_1_mlp_0_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<string, []> input_21_mode_0 = const()[name = tensor<string, []>("input_21_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_21_cast_fp16 = gelu(mode = input_21_mode_0, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> var_485_pad_type_0 = const()[name = tensor<string, []>("op_485_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_485_strides_0 = const()[name = tensor<string, []>("op_485_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_485_pad_0 = const()[name = tensor<string, []>("op_485_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_485_dilations_0 = const()[name = tensor<string, []>("op_485_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_485_groups_0 = const()[name = tensor<string, []>("op_485_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_1_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(29845120)))];
+            tensor<fp16, [768]> blocks_1_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34563776)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_485_cast_fp16 = conv(bias = blocks_1_mlp_2_bias_to_fp16, dilations = var_485_dilations_0, groups = var_485_groups_0, pad = var_485_pad_0, pad_type = var_485_pad_type_0, strides = var_485_strides_0, weight = blocks_1_mlp_2_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("op_485_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = var_485_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_494 = const()[name = tensor<string, []>("op_494"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_23_axes_0 = const()[name = tensor<string, []>("input_23_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_23_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34565376)))];
+            tensor<fp16, [768]> input_23_beta_0_to_fp16 = const()[name = tensor<string, []>("input_23_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34566976)))];
+            tensor<fp16, []> var_510_to_fp16 = const()[name = tensor<string, []>("op_510_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_23_cast_fp16 = layer_norm(axes = input_23_axes_0, beta = input_23_beta_0_to_fp16, epsilon = var_510_to_fp16, gamma = input_23_gamma_0_to_fp16, x = inputs_9_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<string, []> q_5_pad_type_0 = const()[name = tensor<string, []>("q_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_5_strides_0 = const()[name = tensor<string, []>("q_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_5_pad_0 = const()[name = tensor<string, []>("q_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_5_dilations_0 = const()[name = tensor<string, []>("q_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_5_groups_0 = const()[name = tensor<string, []>("q_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_545_weight_0_to_fp16 = const()[name = tensor<string, []>("op_545_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(34568576)))];
+            tensor<fp16, [768]> var_545_bias_0_to_fp16 = const()[name = tensor<string, []>("op_545_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35748288)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_545_cast_fp16 = conv(bias = var_545_bias_0_to_fp16, dilations = q_5_dilations_0, groups = q_5_groups_0, pad = q_5_pad_0, pad_type = q_5_pad_type_0, strides = q_5_strides_0, weight = var_545_weight_0_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_545_cast_fp16")];
+            tensor<string, []> k_5_pad_type_0 = const()[name = tensor<string, []>("k_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_5_strides_0 = const()[name = tensor<string, []>("k_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_5_pad_0 = const()[name = tensor<string, []>("k_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_5_dilations_0 = const()[name = tensor<string, []>("k_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_5_groups_0 = const()[name = tensor<string, []>("k_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_2_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35749888)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_5_cast_fp16 = conv(dilations = k_5_dilations_0, groups = k_5_groups_0, pad = k_5_pad_0, pad_type = k_5_pad_type_0, strides = k_5_strides_0, weight = blocks_2_attn_key_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("k_5_cast_fp16")];
+            tensor<string, []> var_543_pad_type_0 = const()[name = tensor<string, []>("op_543_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_543_strides_0 = const()[name = tensor<string, []>("op_543_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_543_pad_0 = const()[name = tensor<string, []>("op_543_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_543_dilations_0 = const()[name = tensor<string, []>("op_543_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_543_groups_0 = const()[name = tensor<string, []>("op_543_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_2_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(36929600)))];
+            tensor<fp16, [768]> blocks_2_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38109312)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_543_cast_fp16 = conv(bias = blocks_2_attn_value_bias_to_fp16, dilations = var_543_dilations_0, groups = var_543_groups_0, pad = var_543_pad_0, pad_type = var_543_pad_type_0, strides = var_543_strides_0, weight = blocks_2_attn_value_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_543_cast_fp16")];
+            tensor<int32, [12]> tile_6 = const()[name = tensor<string, []>("tile_6"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_546_axis_0 = const()[name = tensor<string, []>("op_546_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_546_cast_fp16_11 = split(axis = var_546_axis_0, split_sizes = tile_6, x = var_545_cast_fp16)[name = tensor<string, []>("op_546_cast_fp16")];
+            tensor<int32, [4]> var_559_perm_0 = const()[name = tensor<string, []>("op_559_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_7 = const()[name = tensor<string, []>("tile_7"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_560_axis_0 = const()[name = tensor<string, []>("op_560_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_559_cast_fp16 = transpose(perm = var_559_perm_0, x = k_5_cast_fp16)[name = tensor<string, []>("transpose_10")];
+            tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_560_cast_fp16_11 = split(axis = var_560_axis_0, split_sizes = tile_7, x = var_559_cast_fp16)[name = tensor<string, []>("op_560_cast_fp16")];
+            tensor<int32, [12]> tile_8 = const()[name = tensor<string, []>("tile_8"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_573_axis_0 = const()[name = tensor<string, []>("op_573_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_573_cast_fp16_11 = split(axis = var_573_axis_0, split_sizes = tile_8, x = var_543_cast_fp16)[name = tensor<string, []>("op_573_cast_fp16")];
+            tensor<string, []> aw_49_equation_0 = const()[name = tensor<string, []>("aw_49_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_49_cast_fp16 = einsum(equation = aw_49_equation_0, values = (var_560_cast_fp16_0, var_546_cast_fp16_0))[name = tensor<string, []>("aw_49_cast_fp16")];
+            tensor<string, []> aw_51_equation_0 = const()[name = tensor<string, []>("aw_51_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_51_cast_fp16 = einsum(equation = aw_51_equation_0, values = (var_560_cast_fp16_1, var_546_cast_fp16_1))[name = tensor<string, []>("aw_51_cast_fp16")];
+            tensor<string, []> aw_53_equation_0 = const()[name = tensor<string, []>("aw_53_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_53_cast_fp16 = einsum(equation = aw_53_equation_0, values = (var_560_cast_fp16_2, var_546_cast_fp16_2))[name = tensor<string, []>("aw_53_cast_fp16")];
+            tensor<string, []> aw_55_equation_0 = const()[name = tensor<string, []>("aw_55_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_55_cast_fp16 = einsum(equation = aw_55_equation_0, values = (var_560_cast_fp16_3, var_546_cast_fp16_3))[name = tensor<string, []>("aw_55_cast_fp16")];
+            tensor<string, []> aw_57_equation_0 = const()[name = tensor<string, []>("aw_57_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_57_cast_fp16 = einsum(equation = aw_57_equation_0, values = (var_560_cast_fp16_4, var_546_cast_fp16_4))[name = tensor<string, []>("aw_57_cast_fp16")];
+            tensor<string, []> aw_59_equation_0 = const()[name = tensor<string, []>("aw_59_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_59_cast_fp16 = einsum(equation = aw_59_equation_0, values = (var_560_cast_fp16_5, var_546_cast_fp16_5))[name = tensor<string, []>("aw_59_cast_fp16")];
+            tensor<string, []> aw_61_equation_0 = const()[name = tensor<string, []>("aw_61_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_61_cast_fp16 = einsum(equation = aw_61_equation_0, values = (var_560_cast_fp16_6, var_546_cast_fp16_6))[name = tensor<string, []>("aw_61_cast_fp16")];
+            tensor<string, []> aw_63_equation_0 = const()[name = tensor<string, []>("aw_63_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_63_cast_fp16 = einsum(equation = aw_63_equation_0, values = (var_560_cast_fp16_7, var_546_cast_fp16_7))[name = tensor<string, []>("aw_63_cast_fp16")];
+            tensor<string, []> aw_65_equation_0 = const()[name = tensor<string, []>("aw_65_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_65_cast_fp16 = einsum(equation = aw_65_equation_0, values = (var_560_cast_fp16_8, var_546_cast_fp16_8))[name = tensor<string, []>("aw_65_cast_fp16")];
+            tensor<string, []> aw_67_equation_0 = const()[name = tensor<string, []>("aw_67_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_67_cast_fp16 = einsum(equation = aw_67_equation_0, values = (var_560_cast_fp16_9, var_546_cast_fp16_9))[name = tensor<string, []>("aw_67_cast_fp16")];
+            tensor<string, []> aw_69_equation_0 = const()[name = tensor<string, []>("aw_69_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_69_cast_fp16 = einsum(equation = aw_69_equation_0, values = (var_560_cast_fp16_10, var_546_cast_fp16_10))[name = tensor<string, []>("aw_69_cast_fp16")];
+            tensor<string, []> aw_71_equation_0 = const()[name = tensor<string, []>("aw_71_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_71_cast_fp16 = einsum(equation = aw_71_equation_0, values = (var_560_cast_fp16_11, var_546_cast_fp16_11))[name = tensor<string, []>("aw_71_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_610_cast_fp16 = softmax(axis = var_494, x = aw_49_cast_fp16)[name = tensor<string, []>("op_610_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_611_cast_fp16 = softmax(axis = var_494, x = aw_51_cast_fp16)[name = tensor<string, []>("op_611_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_612_cast_fp16 = softmax(axis = var_494, x = aw_53_cast_fp16)[name = tensor<string, []>("op_612_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_613_cast_fp16 = softmax(axis = var_494, x = aw_55_cast_fp16)[name = tensor<string, []>("op_613_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_614_cast_fp16 = softmax(axis = var_494, x = aw_57_cast_fp16)[name = tensor<string, []>("op_614_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_615_cast_fp16 = softmax(axis = var_494, x = aw_59_cast_fp16)[name = tensor<string, []>("op_615_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_616_cast_fp16 = softmax(axis = var_494, x = aw_61_cast_fp16)[name = tensor<string, []>("op_616_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_617_cast_fp16 = softmax(axis = var_494, x = aw_63_cast_fp16)[name = tensor<string, []>("op_617_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_618_cast_fp16 = softmax(axis = var_494, x = aw_65_cast_fp16)[name = tensor<string, []>("op_618_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_619_cast_fp16 = softmax(axis = var_494, x = aw_67_cast_fp16)[name = tensor<string, []>("op_619_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_620_cast_fp16 = softmax(axis = var_494, x = aw_69_cast_fp16)[name = tensor<string, []>("op_620_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_621_cast_fp16 = softmax(axis = var_494, x = aw_71_cast_fp16)[name = tensor<string, []>("op_621_cast_fp16")];
+            tensor<string, []> var_623_equation_0 = const()[name = tensor<string, []>("op_623_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_573_cast_fp16_0, var_610_cast_fp16))[name = tensor<string, []>("op_623_cast_fp16")];
+            tensor<string, []> var_625_equation_0 = const()[name = tensor<string, []>("op_625_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_573_cast_fp16_1, var_611_cast_fp16))[name = tensor<string, []>("op_625_cast_fp16")];
+            tensor<string, []> var_627_equation_0 = const()[name = tensor<string, []>("op_627_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_573_cast_fp16_2, var_612_cast_fp16))[name = tensor<string, []>("op_627_cast_fp16")];
+            tensor<string, []> var_629_equation_0 = const()[name = tensor<string, []>("op_629_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_573_cast_fp16_3, var_613_cast_fp16))[name = tensor<string, []>("op_629_cast_fp16")];
+            tensor<string, []> var_631_equation_0 = const()[name = tensor<string, []>("op_631_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_573_cast_fp16_4, var_614_cast_fp16))[name = tensor<string, []>("op_631_cast_fp16")];
+            tensor<string, []> var_633_equation_0 = const()[name = tensor<string, []>("op_633_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_573_cast_fp16_5, var_615_cast_fp16))[name = tensor<string, []>("op_633_cast_fp16")];
+            tensor<string, []> var_635_equation_0 = const()[name = tensor<string, []>("op_635_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_573_cast_fp16_6, var_616_cast_fp16))[name = tensor<string, []>("op_635_cast_fp16")];
+            tensor<string, []> var_637_equation_0 = const()[name = tensor<string, []>("op_637_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_573_cast_fp16_7, var_617_cast_fp16))[name = tensor<string, []>("op_637_cast_fp16")];
+            tensor<string, []> var_639_equation_0 = const()[name = tensor<string, []>("op_639_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_573_cast_fp16_8, var_618_cast_fp16))[name = tensor<string, []>("op_639_cast_fp16")];
+            tensor<string, []> var_641_equation_0 = const()[name = tensor<string, []>("op_641_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_573_cast_fp16_9, var_619_cast_fp16))[name = tensor<string, []>("op_641_cast_fp16")];
+            tensor<string, []> var_643_equation_0 = const()[name = tensor<string, []>("op_643_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_573_cast_fp16_10, var_620_cast_fp16))[name = tensor<string, []>("op_643_cast_fp16")];
+            tensor<string, []> var_645_equation_0 = const()[name = tensor<string, []>("op_645_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_573_cast_fp16_11, var_621_cast_fp16))[name = tensor<string, []>("op_645_cast_fp16")];
+            tensor<bool, []> input_25_interleave_0 = const()[name = tensor<string, []>("input_25_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_25_cast_fp16 = concat(axis = var_494, interleave = input_25_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16))[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<string, []> var_654_pad_type_0 = const()[name = tensor<string, []>("op_654_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_654_strides_0 = const()[name = tensor<string, []>("op_654_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_654_pad_0 = const()[name = tensor<string, []>("op_654_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_654_dilations_0 = const()[name = tensor<string, []>("op_654_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_654_groups_0 = const()[name = tensor<string, []>("op_654_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_2_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38110912)))];
+            tensor<fp16, [768]> blocks_2_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39290624)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_654_cast_fp16 = conv(bias = blocks_2_attn_out_bias_to_fp16, dilations = var_654_dilations_0, groups = var_654_groups_0, pad = var_654_pad_0, pad_type = var_654_pad_type_0, strides = var_654_strides_0, weight = blocks_2_attn_out_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("op_654_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = var_654_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> input_27_axes_0 = const()[name = tensor<string, []>("input_27_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39292224)))];
+            tensor<fp16, [768]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39293824)))];
+            tensor<fp16, []> var_664_to_fp16 = const()[name = tensor<string, []>("op_664_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_27_cast_fp16 = layer_norm(axes = input_27_axes_0, beta = input_27_beta_0_to_fp16, epsilon = var_664_to_fp16, gamma = input_27_gamma_0_to_fp16, x = inputs_11_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_29_strides_0 = const()[name = tensor<string, []>("input_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_29_dilations_0 = const()[name = tensor<string, []>("input_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_29_groups_0 = const()[name = tensor<string, []>("input_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_2_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39295424)))];
+            tensor<fp16, [3072]> blocks_2_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44014080)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_29_cast_fp16 = conv(bias = blocks_2_mlp_0_bias_to_fp16, dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = blocks_2_mlp_0_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<string, []> var_690_pad_type_0 = const()[name = tensor<string, []>("op_690_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_690_strides_0 = const()[name = tensor<string, []>("op_690_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_690_pad_0 = const()[name = tensor<string, []>("op_690_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_690_dilations_0 = const()[name = tensor<string, []>("op_690_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_690_groups_0 = const()[name = tensor<string, []>("op_690_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_2_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(44020288)))];
+            tensor<fp16, [768]> blocks_2_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48738944)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_690_cast_fp16 = conv(bias = blocks_2_mlp_2_bias_to_fp16, dilations = var_690_dilations_0, groups = var_690_groups_0, pad = var_690_pad_0, pad_type = var_690_pad_type_0, strides = var_690_strides_0, weight = blocks_2_mlp_2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("op_690_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = var_690_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_699 = const()[name = tensor<string, []>("op_699"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_33_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_33_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48740544)))];
+            tensor<fp16, [768]> input_33_beta_0_to_fp16 = const()[name = tensor<string, []>("input_33_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48742144)))];
+            tensor<fp16, []> var_715_to_fp16 = const()[name = tensor<string, []>("op_715_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, beta = input_33_beta_0_to_fp16, epsilon = var_715_to_fp16, gamma = input_33_gamma_0_to_fp16, x = inputs_13_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<string, []> q_7_pad_type_0 = const()[name = tensor<string, []>("q_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_7_strides_0 = const()[name = tensor<string, []>("q_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_7_pad_0 = const()[name = tensor<string, []>("q_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_7_dilations_0 = const()[name = tensor<string, []>("q_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_7_groups_0 = const()[name = tensor<string, []>("q_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_750_weight_0_to_fp16 = const()[name = tensor<string, []>("op_750_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48743744)))];
+            tensor<fp16, [768]> var_750_bias_0_to_fp16 = const()[name = tensor<string, []>("op_750_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49923456)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_750_cast_fp16 = conv(bias = var_750_bias_0_to_fp16, dilations = q_7_dilations_0, groups = q_7_groups_0, pad = q_7_pad_0, pad_type = q_7_pad_type_0, strides = q_7_strides_0, weight = var_750_weight_0_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_750_cast_fp16")];
+            tensor<string, []> k_7_pad_type_0 = const()[name = tensor<string, []>("k_7_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_7_strides_0 = const()[name = tensor<string, []>("k_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_7_pad_0 = const()[name = tensor<string, []>("k_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_7_dilations_0 = const()[name = tensor<string, []>("k_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_7_groups_0 = const()[name = tensor<string, []>("k_7_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_3_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(49925056)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_7_cast_fp16 = conv(dilations = k_7_dilations_0, groups = k_7_groups_0, pad = k_7_pad_0, pad_type = k_7_pad_type_0, strides = k_7_strides_0, weight = blocks_3_attn_key_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("k_7_cast_fp16")];
+            tensor<string, []> var_748_pad_type_0 = const()[name = tensor<string, []>("op_748_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_748_strides_0 = const()[name = tensor<string, []>("op_748_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_748_pad_0 = const()[name = tensor<string, []>("op_748_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_748_dilations_0 = const()[name = tensor<string, []>("op_748_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_748_groups_0 = const()[name = tensor<string, []>("op_748_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_3_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(51104768)))];
+            tensor<fp16, [768]> blocks_3_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52284480)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_748_cast_fp16 = conv(bias = blocks_3_attn_value_bias_to_fp16, dilations = var_748_dilations_0, groups = var_748_groups_0, pad = var_748_pad_0, pad_type = var_748_pad_type_0, strides = var_748_strides_0, weight = blocks_3_attn_value_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_748_cast_fp16")];
+            tensor<int32, [12]> tile_9 = const()[name = tensor<string, []>("tile_9"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_751_axis_0 = const()[name = tensor<string, []>("op_751_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_751_cast_fp16_11 = split(axis = var_751_axis_0, split_sizes = tile_9, x = var_750_cast_fp16)[name = tensor<string, []>("op_751_cast_fp16")];
+            tensor<int32, [4]> var_764_perm_0 = const()[name = tensor<string, []>("op_764_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_10 = const()[name = tensor<string, []>("tile_10"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_765_axis_0 = const()[name = tensor<string, []>("op_765_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_764_cast_fp16 = transpose(perm = var_764_perm_0, x = k_7_cast_fp16)[name = tensor<string, []>("transpose_9")];
+            tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_765_cast_fp16_11 = split(axis = var_765_axis_0, split_sizes = tile_10, x = var_764_cast_fp16)[name = tensor<string, []>("op_765_cast_fp16")];
+            tensor<int32, [12]> tile_11 = const()[name = tensor<string, []>("tile_11"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_778_axis_0 = const()[name = tensor<string, []>("op_778_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_778_cast_fp16_11 = split(axis = var_778_axis_0, split_sizes = tile_11, x = var_748_cast_fp16)[name = tensor<string, []>("op_778_cast_fp16")];
+            tensor<string, []> aw_73_equation_0 = const()[name = tensor<string, []>("aw_73_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_73_cast_fp16 = einsum(equation = aw_73_equation_0, values = (var_765_cast_fp16_0, var_751_cast_fp16_0))[name = tensor<string, []>("aw_73_cast_fp16")];
+            tensor<string, []> aw_75_equation_0 = const()[name = tensor<string, []>("aw_75_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_75_cast_fp16 = einsum(equation = aw_75_equation_0, values = (var_765_cast_fp16_1, var_751_cast_fp16_1))[name = tensor<string, []>("aw_75_cast_fp16")];
+            tensor<string, []> aw_77_equation_0 = const()[name = tensor<string, []>("aw_77_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_77_cast_fp16 = einsum(equation = aw_77_equation_0, values = (var_765_cast_fp16_2, var_751_cast_fp16_2))[name = tensor<string, []>("aw_77_cast_fp16")];
+            tensor<string, []> aw_79_equation_0 = const()[name = tensor<string, []>("aw_79_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_79_cast_fp16 = einsum(equation = aw_79_equation_0, values = (var_765_cast_fp16_3, var_751_cast_fp16_3))[name = tensor<string, []>("aw_79_cast_fp16")];
+            tensor<string, []> aw_81_equation_0 = const()[name = tensor<string, []>("aw_81_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_81_cast_fp16 = einsum(equation = aw_81_equation_0, values = (var_765_cast_fp16_4, var_751_cast_fp16_4))[name = tensor<string, []>("aw_81_cast_fp16")];
+            tensor<string, []> aw_83_equation_0 = const()[name = tensor<string, []>("aw_83_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_83_cast_fp16 = einsum(equation = aw_83_equation_0, values = (var_765_cast_fp16_5, var_751_cast_fp16_5))[name = tensor<string, []>("aw_83_cast_fp16")];
+            tensor<string, []> aw_85_equation_0 = const()[name = tensor<string, []>("aw_85_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_85_cast_fp16 = einsum(equation = aw_85_equation_0, values = (var_765_cast_fp16_6, var_751_cast_fp16_6))[name = tensor<string, []>("aw_85_cast_fp16")];
+            tensor<string, []> aw_87_equation_0 = const()[name = tensor<string, []>("aw_87_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_87_cast_fp16 = einsum(equation = aw_87_equation_0, values = (var_765_cast_fp16_7, var_751_cast_fp16_7))[name = tensor<string, []>("aw_87_cast_fp16")];
+            tensor<string, []> aw_89_equation_0 = const()[name = tensor<string, []>("aw_89_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_89_cast_fp16 = einsum(equation = aw_89_equation_0, values = (var_765_cast_fp16_8, var_751_cast_fp16_8))[name = tensor<string, []>("aw_89_cast_fp16")];
+            tensor<string, []> aw_91_equation_0 = const()[name = tensor<string, []>("aw_91_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_91_cast_fp16 = einsum(equation = aw_91_equation_0, values = (var_765_cast_fp16_9, var_751_cast_fp16_9))[name = tensor<string, []>("aw_91_cast_fp16")];
+            tensor<string, []> aw_93_equation_0 = const()[name = tensor<string, []>("aw_93_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_93_cast_fp16 = einsum(equation = aw_93_equation_0, values = (var_765_cast_fp16_10, var_751_cast_fp16_10))[name = tensor<string, []>("aw_93_cast_fp16")];
+            tensor<string, []> aw_95_equation_0 = const()[name = tensor<string, []>("aw_95_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_95_cast_fp16 = einsum(equation = aw_95_equation_0, values = (var_765_cast_fp16_11, var_751_cast_fp16_11))[name = tensor<string, []>("aw_95_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_815_cast_fp16 = softmax(axis = var_699, x = aw_73_cast_fp16)[name = tensor<string, []>("op_815_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_816_cast_fp16 = softmax(axis = var_699, x = aw_75_cast_fp16)[name = tensor<string, []>("op_816_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_817_cast_fp16 = softmax(axis = var_699, x = aw_77_cast_fp16)[name = tensor<string, []>("op_817_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_818_cast_fp16 = softmax(axis = var_699, x = aw_79_cast_fp16)[name = tensor<string, []>("op_818_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_819_cast_fp16 = softmax(axis = var_699, x = aw_81_cast_fp16)[name = tensor<string, []>("op_819_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_820_cast_fp16 = softmax(axis = var_699, x = aw_83_cast_fp16)[name = tensor<string, []>("op_820_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_821_cast_fp16 = softmax(axis = var_699, x = aw_85_cast_fp16)[name = tensor<string, []>("op_821_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_822_cast_fp16 = softmax(axis = var_699, x = aw_87_cast_fp16)[name = tensor<string, []>("op_822_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_823_cast_fp16 = softmax(axis = var_699, x = aw_89_cast_fp16)[name = tensor<string, []>("op_823_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_824_cast_fp16 = softmax(axis = var_699, x = aw_91_cast_fp16)[name = tensor<string, []>("op_824_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_825_cast_fp16 = softmax(axis = var_699, x = aw_93_cast_fp16)[name = tensor<string, []>("op_825_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_826_cast_fp16 = softmax(axis = var_699, x = aw_95_cast_fp16)[name = tensor<string, []>("op_826_cast_fp16")];
+            tensor<string, []> var_828_equation_0 = const()[name = tensor<string, []>("op_828_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_828_cast_fp16 = einsum(equation = var_828_equation_0, values = (var_778_cast_fp16_0, var_815_cast_fp16))[name = tensor<string, []>("op_828_cast_fp16")];
+            tensor<string, []> var_830_equation_0 = const()[name = tensor<string, []>("op_830_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_830_cast_fp16 = einsum(equation = var_830_equation_0, values = (var_778_cast_fp16_1, var_816_cast_fp16))[name = tensor<string, []>("op_830_cast_fp16")];
+            tensor<string, []> var_832_equation_0 = const()[name = tensor<string, []>("op_832_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_832_cast_fp16 = einsum(equation = var_832_equation_0, values = (var_778_cast_fp16_2, var_817_cast_fp16))[name = tensor<string, []>("op_832_cast_fp16")];
+            tensor<string, []> var_834_equation_0 = const()[name = tensor<string, []>("op_834_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_834_cast_fp16 = einsum(equation = var_834_equation_0, values = (var_778_cast_fp16_3, var_818_cast_fp16))[name = tensor<string, []>("op_834_cast_fp16")];
+            tensor<string, []> var_836_equation_0 = const()[name = tensor<string, []>("op_836_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_836_cast_fp16 = einsum(equation = var_836_equation_0, values = (var_778_cast_fp16_4, var_819_cast_fp16))[name = tensor<string, []>("op_836_cast_fp16")];
+            tensor<string, []> var_838_equation_0 = const()[name = tensor<string, []>("op_838_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_838_cast_fp16 = einsum(equation = var_838_equation_0, values = (var_778_cast_fp16_5, var_820_cast_fp16))[name = tensor<string, []>("op_838_cast_fp16")];
+            tensor<string, []> var_840_equation_0 = const()[name = tensor<string, []>("op_840_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_840_cast_fp16 = einsum(equation = var_840_equation_0, values = (var_778_cast_fp16_6, var_821_cast_fp16))[name = tensor<string, []>("op_840_cast_fp16")];
+            tensor<string, []> var_842_equation_0 = const()[name = tensor<string, []>("op_842_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_842_cast_fp16 = einsum(equation = var_842_equation_0, values = (var_778_cast_fp16_7, var_822_cast_fp16))[name = tensor<string, []>("op_842_cast_fp16")];
+            tensor<string, []> var_844_equation_0 = const()[name = tensor<string, []>("op_844_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_844_cast_fp16 = einsum(equation = var_844_equation_0, values = (var_778_cast_fp16_8, var_823_cast_fp16))[name = tensor<string, []>("op_844_cast_fp16")];
+            tensor<string, []> var_846_equation_0 = const()[name = tensor<string, []>("op_846_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_846_cast_fp16 = einsum(equation = var_846_equation_0, values = (var_778_cast_fp16_9, var_824_cast_fp16))[name = tensor<string, []>("op_846_cast_fp16")];
+            tensor<string, []> var_848_equation_0 = const()[name = tensor<string, []>("op_848_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_848_cast_fp16 = einsum(equation = var_848_equation_0, values = (var_778_cast_fp16_10, var_825_cast_fp16))[name = tensor<string, []>("op_848_cast_fp16")];
+            tensor<string, []> var_850_equation_0 = const()[name = tensor<string, []>("op_850_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_850_cast_fp16 = einsum(equation = var_850_equation_0, values = (var_778_cast_fp16_11, var_826_cast_fp16))[name = tensor<string, []>("op_850_cast_fp16")];
+            tensor<bool, []> input_35_interleave_0 = const()[name = tensor<string, []>("input_35_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_35_cast_fp16 = concat(axis = var_699, interleave = input_35_interleave_0, values = (var_828_cast_fp16, var_830_cast_fp16, var_832_cast_fp16, var_834_cast_fp16, var_836_cast_fp16, var_838_cast_fp16, var_840_cast_fp16, var_842_cast_fp16, var_844_cast_fp16, var_846_cast_fp16, var_848_cast_fp16, var_850_cast_fp16))[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<string, []> var_859_pad_type_0 = const()[name = tensor<string, []>("op_859_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_859_strides_0 = const()[name = tensor<string, []>("op_859_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_859_pad_0 = const()[name = tensor<string, []>("op_859_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_859_dilations_0 = const()[name = tensor<string, []>("op_859_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_859_groups_0 = const()[name = tensor<string, []>("op_859_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_3_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52286080)))];
+            tensor<fp16, [768]> blocks_3_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53465792)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_859_cast_fp16 = conv(bias = blocks_3_attn_out_bias_to_fp16, dilations = var_859_dilations_0, groups = var_859_groups_0, pad = var_859_pad_0, pad_type = var_859_pad_type_0, strides = var_859_strides_0, weight = blocks_3_attn_out_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("op_859_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = var_859_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> input_37_axes_0 = const()[name = tensor<string, []>("input_37_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_37_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53467392)))];
+            tensor<fp16, [768]> input_37_beta_0_to_fp16 = const()[name = tensor<string, []>("input_37_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53468992)))];
+            tensor<fp16, []> var_869_to_fp16 = const()[name = tensor<string, []>("op_869_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = input_37_beta_0_to_fp16, epsilon = var_869_to_fp16, gamma = input_37_gamma_0_to_fp16, x = inputs_15_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_pad_type_0 = const()[name = tensor<string, []>("input_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_39_strides_0 = const()[name = tensor<string, []>("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_39_pad_0 = const()[name = tensor<string, []>("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_39_dilations_0 = const()[name = tensor<string, []>("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_39_groups_0 = const()[name = tensor<string, []>("input_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_3_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53470592)))];
+            tensor<fp16, [3072]> blocks_3_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58189248)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_39_cast_fp16 = conv(bias = blocks_3_mlp_0_bias_to_fp16, dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = blocks_3_mlp_0_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<string, []> input_41_mode_0 = const()[name = tensor<string, []>("input_41_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = input_39_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<string, []> var_895_pad_type_0 = const()[name = tensor<string, []>("op_895_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_895_strides_0 = const()[name = tensor<string, []>("op_895_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_895_pad_0 = const()[name = tensor<string, []>("op_895_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_895_dilations_0 = const()[name = tensor<string, []>("op_895_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_895_groups_0 = const()[name = tensor<string, []>("op_895_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_3_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58195456)))];
+            tensor<fp16, [768]> blocks_3_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62914112)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_895_cast_fp16 = conv(bias = blocks_3_mlp_2_bias_to_fp16, dilations = var_895_dilations_0, groups = var_895_groups_0, pad = var_895_pad_0, pad_type = var_895_pad_type_0, strides = var_895_strides_0, weight = blocks_3_mlp_2_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("op_895_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = var_895_cast_fp16)[name = tensor<string, []>("inputs_17_cast_fp16")];
+            tensor<int32, []> var_904 = const()[name = tensor<string, []>("op_904"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_43_axes_0 = const()[name = tensor<string, []>("input_43_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_43_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_43_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62915712)))];
+            tensor<fp16, [768]> input_43_beta_0_to_fp16 = const()[name = tensor<string, []>("input_43_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62917312)))];
+            tensor<fp16, []> var_920_to_fp16 = const()[name = tensor<string, []>("op_920_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_43_cast_fp16 = layer_norm(axes = input_43_axes_0, beta = input_43_beta_0_to_fp16, epsilon = var_920_to_fp16, gamma = input_43_gamma_0_to_fp16, x = inputs_17_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<string, []> q_9_pad_type_0 = const()[name = tensor<string, []>("q_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_9_strides_0 = const()[name = tensor<string, []>("q_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_9_pad_0 = const()[name = tensor<string, []>("q_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_9_dilations_0 = const()[name = tensor<string, []>("q_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_9_groups_0 = const()[name = tensor<string, []>("q_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_955_weight_0_to_fp16 = const()[name = tensor<string, []>("op_955_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62918912)))];
+            tensor<fp16, [768]> var_955_bias_0_to_fp16 = const()[name = tensor<string, []>("op_955_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64098624)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_955_cast_fp16 = conv(bias = var_955_bias_0_to_fp16, dilations = q_9_dilations_0, groups = q_9_groups_0, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = q_9_strides_0, weight = var_955_weight_0_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_955_cast_fp16")];
+            tensor<string, []> k_9_pad_type_0 = const()[name = tensor<string, []>("k_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_9_strides_0 = const()[name = tensor<string, []>("k_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_9_pad_0 = const()[name = tensor<string, []>("k_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_9_dilations_0 = const()[name = tensor<string, []>("k_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_9_groups_0 = const()[name = tensor<string, []>("k_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_4_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64100224)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_9_cast_fp16 = conv(dilations = k_9_dilations_0, groups = k_9_groups_0, pad = k_9_pad_0, pad_type = k_9_pad_type_0, strides = k_9_strides_0, weight = blocks_4_attn_key_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("k_9_cast_fp16")];
+            tensor<string, []> var_953_pad_type_0 = const()[name = tensor<string, []>("op_953_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_953_strides_0 = const()[name = tensor<string, []>("op_953_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_953_pad_0 = const()[name = tensor<string, []>("op_953_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_953_dilations_0 = const()[name = tensor<string, []>("op_953_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_953_groups_0 = const()[name = tensor<string, []>("op_953_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_4_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65279936)))];
+            tensor<fp16, [768]> blocks_4_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66459648)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_953_cast_fp16 = conv(bias = blocks_4_attn_value_bias_to_fp16, dilations = var_953_dilations_0, groups = var_953_groups_0, pad = var_953_pad_0, pad_type = var_953_pad_type_0, strides = var_953_strides_0, weight = blocks_4_attn_value_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("op_953_cast_fp16")];
+            tensor<int32, [12]> tile_12 = const()[name = tensor<string, []>("tile_12"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_956_axis_0 = const()[name = tensor<string, []>("op_956_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_956_cast_fp16_11 = split(axis = var_956_axis_0, split_sizes = tile_12, x = var_955_cast_fp16)[name = tensor<string, []>("op_956_cast_fp16")];
+            tensor<int32, [4]> var_969_perm_0 = const()[name = tensor<string, []>("op_969_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_13 = const()[name = tensor<string, []>("tile_13"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_970_axis_0 = const()[name = tensor<string, []>("op_970_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_969_cast_fp16 = transpose(perm = var_969_perm_0, x = k_9_cast_fp16)[name = tensor<string, []>("transpose_8")];
+            tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_970_cast_fp16_11 = split(axis = var_970_axis_0, split_sizes = tile_13, x = var_969_cast_fp16)[name = tensor<string, []>("op_970_cast_fp16")];
+            tensor<int32, [12]> tile_14 = const()[name = tensor<string, []>("tile_14"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_983_axis_0 = const()[name = tensor<string, []>("op_983_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_983_cast_fp16_11 = split(axis = var_983_axis_0, split_sizes = tile_14, x = var_953_cast_fp16)[name = tensor<string, []>("op_983_cast_fp16")];
+            tensor<string, []> aw_97_equation_0 = const()[name = tensor<string, []>("aw_97_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_97_cast_fp16 = einsum(equation = aw_97_equation_0, values = (var_970_cast_fp16_0, var_956_cast_fp16_0))[name = tensor<string, []>("aw_97_cast_fp16")];
+            tensor<string, []> aw_99_equation_0 = const()[name = tensor<string, []>("aw_99_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_99_cast_fp16 = einsum(equation = aw_99_equation_0, values = (var_970_cast_fp16_1, var_956_cast_fp16_1))[name = tensor<string, []>("aw_99_cast_fp16")];
+            tensor<string, []> aw_101_equation_0 = const()[name = tensor<string, []>("aw_101_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_101_cast_fp16 = einsum(equation = aw_101_equation_0, values = (var_970_cast_fp16_2, var_956_cast_fp16_2))[name = tensor<string, []>("aw_101_cast_fp16")];
+            tensor<string, []> aw_103_equation_0 = const()[name = tensor<string, []>("aw_103_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_103_cast_fp16 = einsum(equation = aw_103_equation_0, values = (var_970_cast_fp16_3, var_956_cast_fp16_3))[name = tensor<string, []>("aw_103_cast_fp16")];
+            tensor<string, []> aw_105_equation_0 = const()[name = tensor<string, []>("aw_105_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_105_cast_fp16 = einsum(equation = aw_105_equation_0, values = (var_970_cast_fp16_4, var_956_cast_fp16_4))[name = tensor<string, []>("aw_105_cast_fp16")];
+            tensor<string, []> aw_107_equation_0 = const()[name = tensor<string, []>("aw_107_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_107_cast_fp16 = einsum(equation = aw_107_equation_0, values = (var_970_cast_fp16_5, var_956_cast_fp16_5))[name = tensor<string, []>("aw_107_cast_fp16")];
+            tensor<string, []> aw_109_equation_0 = const()[name = tensor<string, []>("aw_109_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_109_cast_fp16 = einsum(equation = aw_109_equation_0, values = (var_970_cast_fp16_6, var_956_cast_fp16_6))[name = tensor<string, []>("aw_109_cast_fp16")];
+            tensor<string, []> aw_111_equation_0 = const()[name = tensor<string, []>("aw_111_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_111_cast_fp16 = einsum(equation = aw_111_equation_0, values = (var_970_cast_fp16_7, var_956_cast_fp16_7))[name = tensor<string, []>("aw_111_cast_fp16")];
+            tensor<string, []> aw_113_equation_0 = const()[name = tensor<string, []>("aw_113_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_113_cast_fp16 = einsum(equation = aw_113_equation_0, values = (var_970_cast_fp16_8, var_956_cast_fp16_8))[name = tensor<string, []>("aw_113_cast_fp16")];
+            tensor<string, []> aw_115_equation_0 = const()[name = tensor<string, []>("aw_115_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_115_cast_fp16 = einsum(equation = aw_115_equation_0, values = (var_970_cast_fp16_9, var_956_cast_fp16_9))[name = tensor<string, []>("aw_115_cast_fp16")];
+            tensor<string, []> aw_117_equation_0 = const()[name = tensor<string, []>("aw_117_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_117_cast_fp16 = einsum(equation = aw_117_equation_0, values = (var_970_cast_fp16_10, var_956_cast_fp16_10))[name = tensor<string, []>("aw_117_cast_fp16")];
+            tensor<string, []> aw_119_equation_0 = const()[name = tensor<string, []>("aw_119_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_119_cast_fp16 = einsum(equation = aw_119_equation_0, values = (var_970_cast_fp16_11, var_956_cast_fp16_11))[name = tensor<string, []>("aw_119_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1020_cast_fp16 = softmax(axis = var_904, x = aw_97_cast_fp16)[name = tensor<string, []>("op_1020_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1021_cast_fp16 = softmax(axis = var_904, x = aw_99_cast_fp16)[name = tensor<string, []>("op_1021_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1022_cast_fp16 = softmax(axis = var_904, x = aw_101_cast_fp16)[name = tensor<string, []>("op_1022_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1023_cast_fp16 = softmax(axis = var_904, x = aw_103_cast_fp16)[name = tensor<string, []>("op_1023_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1024_cast_fp16 = softmax(axis = var_904, x = aw_105_cast_fp16)[name = tensor<string, []>("op_1024_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1025_cast_fp16 = softmax(axis = var_904, x = aw_107_cast_fp16)[name = tensor<string, []>("op_1025_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1026_cast_fp16 = softmax(axis = var_904, x = aw_109_cast_fp16)[name = tensor<string, []>("op_1026_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1027_cast_fp16 = softmax(axis = var_904, x = aw_111_cast_fp16)[name = tensor<string, []>("op_1027_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1028_cast_fp16 = softmax(axis = var_904, x = aw_113_cast_fp16)[name = tensor<string, []>("op_1028_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1029_cast_fp16 = softmax(axis = var_904, x = aw_115_cast_fp16)[name = tensor<string, []>("op_1029_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1030_cast_fp16 = softmax(axis = var_904, x = aw_117_cast_fp16)[name = tensor<string, []>("op_1030_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1031_cast_fp16 = softmax(axis = var_904, x = aw_119_cast_fp16)[name = tensor<string, []>("op_1031_cast_fp16")];
+            tensor<string, []> var_1033_equation_0 = const()[name = tensor<string, []>("op_1033_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1033_cast_fp16 = einsum(equation = var_1033_equation_0, values = (var_983_cast_fp16_0, var_1020_cast_fp16))[name = tensor<string, []>("op_1033_cast_fp16")];
+            tensor<string, []> var_1035_equation_0 = const()[name = tensor<string, []>("op_1035_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1035_cast_fp16 = einsum(equation = var_1035_equation_0, values = (var_983_cast_fp16_1, var_1021_cast_fp16))[name = tensor<string, []>("op_1035_cast_fp16")];
+            tensor<string, []> var_1037_equation_0 = const()[name = tensor<string, []>("op_1037_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1037_cast_fp16 = einsum(equation = var_1037_equation_0, values = (var_983_cast_fp16_2, var_1022_cast_fp16))[name = tensor<string, []>("op_1037_cast_fp16")];
+            tensor<string, []> var_1039_equation_0 = const()[name = tensor<string, []>("op_1039_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1039_cast_fp16 = einsum(equation = var_1039_equation_0, values = (var_983_cast_fp16_3, var_1023_cast_fp16))[name = tensor<string, []>("op_1039_cast_fp16")];
+            tensor<string, []> var_1041_equation_0 = const()[name = tensor<string, []>("op_1041_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1041_cast_fp16 = einsum(equation = var_1041_equation_0, values = (var_983_cast_fp16_4, var_1024_cast_fp16))[name = tensor<string, []>("op_1041_cast_fp16")];
+            tensor<string, []> var_1043_equation_0 = const()[name = tensor<string, []>("op_1043_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1043_cast_fp16 = einsum(equation = var_1043_equation_0, values = (var_983_cast_fp16_5, var_1025_cast_fp16))[name = tensor<string, []>("op_1043_cast_fp16")];
+            tensor<string, []> var_1045_equation_0 = const()[name = tensor<string, []>("op_1045_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1045_cast_fp16 = einsum(equation = var_1045_equation_0, values = (var_983_cast_fp16_6, var_1026_cast_fp16))[name = tensor<string, []>("op_1045_cast_fp16")];
+            tensor<string, []> var_1047_equation_0 = const()[name = tensor<string, []>("op_1047_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1047_cast_fp16 = einsum(equation = var_1047_equation_0, values = (var_983_cast_fp16_7, var_1027_cast_fp16))[name = tensor<string, []>("op_1047_cast_fp16")];
+            tensor<string, []> var_1049_equation_0 = const()[name = tensor<string, []>("op_1049_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1049_cast_fp16 = einsum(equation = var_1049_equation_0, values = (var_983_cast_fp16_8, var_1028_cast_fp16))[name = tensor<string, []>("op_1049_cast_fp16")];
+            tensor<string, []> var_1051_equation_0 = const()[name = tensor<string, []>("op_1051_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1051_cast_fp16 = einsum(equation = var_1051_equation_0, values = (var_983_cast_fp16_9, var_1029_cast_fp16))[name = tensor<string, []>("op_1051_cast_fp16")];
+            tensor<string, []> var_1053_equation_0 = const()[name = tensor<string, []>("op_1053_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1053_cast_fp16 = einsum(equation = var_1053_equation_0, values = (var_983_cast_fp16_10, var_1030_cast_fp16))[name = tensor<string, []>("op_1053_cast_fp16")];
+            tensor<string, []> var_1055_equation_0 = const()[name = tensor<string, []>("op_1055_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1055_cast_fp16 = einsum(equation = var_1055_equation_0, values = (var_983_cast_fp16_11, var_1031_cast_fp16))[name = tensor<string, []>("op_1055_cast_fp16")];
+            tensor<bool, []> input_45_interleave_0 = const()[name = tensor<string, []>("input_45_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_45_cast_fp16 = concat(axis = var_904, interleave = input_45_interleave_0, values = (var_1033_cast_fp16, var_1035_cast_fp16, var_1037_cast_fp16, var_1039_cast_fp16, var_1041_cast_fp16, var_1043_cast_fp16, var_1045_cast_fp16, var_1047_cast_fp16, var_1049_cast_fp16, var_1051_cast_fp16, var_1053_cast_fp16, var_1055_cast_fp16))[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<string, []> var_1064_pad_type_0 = const()[name = tensor<string, []>("op_1064_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1064_strides_0 = const()[name = tensor<string, []>("op_1064_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1064_pad_0 = const()[name = tensor<string, []>("op_1064_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1064_dilations_0 = const()[name = tensor<string, []>("op_1064_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1064_groups_0 = const()[name = tensor<string, []>("op_1064_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_4_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66461248)))];
+            tensor<fp16, [768]> blocks_4_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67640960)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1064_cast_fp16 = conv(bias = blocks_4_attn_out_bias_to_fp16, dilations = var_1064_dilations_0, groups = var_1064_groups_0, pad = var_1064_pad_0, pad_type = var_1064_pad_type_0, strides = var_1064_strides_0, weight = blocks_4_attn_out_weight_to_fp16, x = input_45_cast_fp16)[name = tensor<string, []>("op_1064_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = var_1064_cast_fp16)[name = tensor<string, []>("inputs_19_cast_fp16")];
+            tensor<int32, [1]> input_47_axes_0 = const()[name = tensor<string, []>("input_47_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_47_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_47_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67642560)))];
+            tensor<fp16, [768]> input_47_beta_0_to_fp16 = const()[name = tensor<string, []>("input_47_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67644160)))];
+            tensor<fp16, []> var_1074_to_fp16 = const()[name = tensor<string, []>("op_1074_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, beta = input_47_beta_0_to_fp16, epsilon = var_1074_to_fp16, gamma = input_47_gamma_0_to_fp16, x = inputs_19_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<string, []> input_49_pad_type_0 = const()[name = tensor<string, []>("input_49_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_49_strides_0 = const()[name = tensor<string, []>("input_49_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_49_pad_0 = const()[name = tensor<string, []>("input_49_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_49_dilations_0 = const()[name = tensor<string, []>("input_49_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_49_groups_0 = const()[name = tensor<string, []>("input_49_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_4_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67645760)))];
+            tensor<fp16, [3072]> blocks_4_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72364416)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_49_cast_fp16 = conv(bias = blocks_4_mlp_0_bias_to_fp16, dilations = input_49_dilations_0, groups = input_49_groups_0, pad = input_49_pad_0, pad_type = input_49_pad_type_0, strides = input_49_strides_0, weight = blocks_4_mlp_0_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<string, []> input_51_mode_0 = const()[name = tensor<string, []>("input_51_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_51_cast_fp16 = gelu(mode = input_51_mode_0, x = input_49_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<string, []> var_1100_pad_type_0 = const()[name = tensor<string, []>("op_1100_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1100_strides_0 = const()[name = tensor<string, []>("op_1100_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1100_pad_0 = const()[name = tensor<string, []>("op_1100_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1100_dilations_0 = const()[name = tensor<string, []>("op_1100_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1100_groups_0 = const()[name = tensor<string, []>("op_1100_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_4_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72370624)))];
+            tensor<fp16, [768]> blocks_4_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_4_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77089280)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1100_cast_fp16 = conv(bias = blocks_4_mlp_2_bias_to_fp16, dilations = var_1100_dilations_0, groups = var_1100_groups_0, pad = var_1100_pad_0, pad_type = var_1100_pad_type_0, strides = var_1100_strides_0, weight = blocks_4_mlp_2_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("op_1100_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = var_1100_cast_fp16)[name = tensor<string, []>("inputs_21_cast_fp16")];
+            tensor<int32, []> var_1109 = const()[name = tensor<string, []>("op_1109"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_53_axes_0 = const()[name = tensor<string, []>("input_53_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_53_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_53_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77090880)))];
+            tensor<fp16, [768]> input_53_beta_0_to_fp16 = const()[name = tensor<string, []>("input_53_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77092480)))];
+            tensor<fp16, []> var_1125_to_fp16 = const()[name = tensor<string, []>("op_1125_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_53_cast_fp16 = layer_norm(axes = input_53_axes_0, beta = input_53_beta_0_to_fp16, epsilon = var_1125_to_fp16, gamma = input_53_gamma_0_to_fp16, x = inputs_21_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<string, []> q_11_pad_type_0 = const()[name = tensor<string, []>("q_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_11_strides_0 = const()[name = tensor<string, []>("q_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_11_pad_0 = const()[name = tensor<string, []>("q_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_11_dilations_0 = const()[name = tensor<string, []>("q_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_11_groups_0 = const()[name = tensor<string, []>("q_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_1160_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1160_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77094080)))];
+            tensor<fp16, [768]> var_1160_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1160_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78273792)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1160_cast_fp16 = conv(bias = var_1160_bias_0_to_fp16, dilations = q_11_dilations_0, groups = q_11_groups_0, pad = q_11_pad_0, pad_type = q_11_pad_type_0, strides = q_11_strides_0, weight = var_1160_weight_0_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_1160_cast_fp16")];
+            tensor<string, []> k_11_pad_type_0 = const()[name = tensor<string, []>("k_11_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_11_strides_0 = const()[name = tensor<string, []>("k_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_11_pad_0 = const()[name = tensor<string, []>("k_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_11_dilations_0 = const()[name = tensor<string, []>("k_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_11_groups_0 = const()[name = tensor<string, []>("k_11_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_5_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78275392)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_11_cast_fp16 = conv(dilations = k_11_dilations_0, groups = k_11_groups_0, pad = k_11_pad_0, pad_type = k_11_pad_type_0, strides = k_11_strides_0, weight = blocks_5_attn_key_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("k_11_cast_fp16")];
+            tensor<string, []> var_1158_pad_type_0 = const()[name = tensor<string, []>("op_1158_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1158_strides_0 = const()[name = tensor<string, []>("op_1158_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1158_pad_0 = const()[name = tensor<string, []>("op_1158_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1158_dilations_0 = const()[name = tensor<string, []>("op_1158_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1158_groups_0 = const()[name = tensor<string, []>("op_1158_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_5_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79455104)))];
+            tensor<fp16, [768]> blocks_5_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80634816)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1158_cast_fp16 = conv(bias = blocks_5_attn_value_bias_to_fp16, dilations = var_1158_dilations_0, groups = var_1158_groups_0, pad = var_1158_pad_0, pad_type = var_1158_pad_type_0, strides = var_1158_strides_0, weight = blocks_5_attn_value_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("op_1158_cast_fp16")];
+            tensor<int32, [12]> tile_15 = const()[name = tensor<string, []>("tile_15"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1161_axis_0 = const()[name = tensor<string, []>("op_1161_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1161_cast_fp16_11 = split(axis = var_1161_axis_0, split_sizes = tile_15, x = var_1160_cast_fp16)[name = tensor<string, []>("op_1161_cast_fp16")];
+            tensor<int32, [4]> var_1174_perm_0 = const()[name = tensor<string, []>("op_1174_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_16 = const()[name = tensor<string, []>("tile_16"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1175_axis_0 = const()[name = tensor<string, []>("op_1175_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_1174_cast_fp16 = transpose(perm = var_1174_perm_0, x = k_11_cast_fp16)[name = tensor<string, []>("transpose_7")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1175_cast_fp16_11 = split(axis = var_1175_axis_0, split_sizes = tile_16, x = var_1174_cast_fp16)[name = tensor<string, []>("op_1175_cast_fp16")];
+            tensor<int32, [12]> tile_17 = const()[name = tensor<string, []>("tile_17"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1188_axis_0 = const()[name = tensor<string, []>("op_1188_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1188_cast_fp16_11 = split(axis = var_1188_axis_0, split_sizes = tile_17, x = var_1158_cast_fp16)[name = tensor<string, []>("op_1188_cast_fp16")];
+            tensor<string, []> aw_121_equation_0 = const()[name = tensor<string, []>("aw_121_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_121_cast_fp16 = einsum(equation = aw_121_equation_0, values = (var_1175_cast_fp16_0, var_1161_cast_fp16_0))[name = tensor<string, []>("aw_121_cast_fp16")];
+            tensor<string, []> aw_123_equation_0 = const()[name = tensor<string, []>("aw_123_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_123_cast_fp16 = einsum(equation = aw_123_equation_0, values = (var_1175_cast_fp16_1, var_1161_cast_fp16_1))[name = tensor<string, []>("aw_123_cast_fp16")];
+            tensor<string, []> aw_125_equation_0 = const()[name = tensor<string, []>("aw_125_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_125_cast_fp16 = einsum(equation = aw_125_equation_0, values = (var_1175_cast_fp16_2, var_1161_cast_fp16_2))[name = tensor<string, []>("aw_125_cast_fp16")];
+            tensor<string, []> aw_127_equation_0 = const()[name = tensor<string, []>("aw_127_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_127_cast_fp16 = einsum(equation = aw_127_equation_0, values = (var_1175_cast_fp16_3, var_1161_cast_fp16_3))[name = tensor<string, []>("aw_127_cast_fp16")];
+            tensor<string, []> aw_129_equation_0 = const()[name = tensor<string, []>("aw_129_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_129_cast_fp16 = einsum(equation = aw_129_equation_0, values = (var_1175_cast_fp16_4, var_1161_cast_fp16_4))[name = tensor<string, []>("aw_129_cast_fp16")];
+            tensor<string, []> aw_131_equation_0 = const()[name = tensor<string, []>("aw_131_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_131_cast_fp16 = einsum(equation = aw_131_equation_0, values = (var_1175_cast_fp16_5, var_1161_cast_fp16_5))[name = tensor<string, []>("aw_131_cast_fp16")];
+            tensor<string, []> aw_133_equation_0 = const()[name = tensor<string, []>("aw_133_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_133_cast_fp16 = einsum(equation = aw_133_equation_0, values = (var_1175_cast_fp16_6, var_1161_cast_fp16_6))[name = tensor<string, []>("aw_133_cast_fp16")];
+            tensor<string, []> aw_135_equation_0 = const()[name = tensor<string, []>("aw_135_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_135_cast_fp16 = einsum(equation = aw_135_equation_0, values = (var_1175_cast_fp16_7, var_1161_cast_fp16_7))[name = tensor<string, []>("aw_135_cast_fp16")];
+            tensor<string, []> aw_137_equation_0 = const()[name = tensor<string, []>("aw_137_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_137_cast_fp16 = einsum(equation = aw_137_equation_0, values = (var_1175_cast_fp16_8, var_1161_cast_fp16_8))[name = tensor<string, []>("aw_137_cast_fp16")];
+            tensor<string, []> aw_139_equation_0 = const()[name = tensor<string, []>("aw_139_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_139_cast_fp16 = einsum(equation = aw_139_equation_0, values = (var_1175_cast_fp16_9, var_1161_cast_fp16_9))[name = tensor<string, []>("aw_139_cast_fp16")];
+            tensor<string, []> aw_141_equation_0 = const()[name = tensor<string, []>("aw_141_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_141_cast_fp16 = einsum(equation = aw_141_equation_0, values = (var_1175_cast_fp16_10, var_1161_cast_fp16_10))[name = tensor<string, []>("aw_141_cast_fp16")];
+            tensor<string, []> aw_143_equation_0 = const()[name = tensor<string, []>("aw_143_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_143_cast_fp16 = einsum(equation = aw_143_equation_0, values = (var_1175_cast_fp16_11, var_1161_cast_fp16_11))[name = tensor<string, []>("aw_143_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1225_cast_fp16 = softmax(axis = var_1109, x = aw_121_cast_fp16)[name = tensor<string, []>("op_1225_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1226_cast_fp16 = softmax(axis = var_1109, x = aw_123_cast_fp16)[name = tensor<string, []>("op_1226_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1227_cast_fp16 = softmax(axis = var_1109, x = aw_125_cast_fp16)[name = tensor<string, []>("op_1227_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1228_cast_fp16 = softmax(axis = var_1109, x = aw_127_cast_fp16)[name = tensor<string, []>("op_1228_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1229_cast_fp16 = softmax(axis = var_1109, x = aw_129_cast_fp16)[name = tensor<string, []>("op_1229_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1230_cast_fp16 = softmax(axis = var_1109, x = aw_131_cast_fp16)[name = tensor<string, []>("op_1230_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1231_cast_fp16 = softmax(axis = var_1109, x = aw_133_cast_fp16)[name = tensor<string, []>("op_1231_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1232_cast_fp16 = softmax(axis = var_1109, x = aw_135_cast_fp16)[name = tensor<string, []>("op_1232_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1233_cast_fp16 = softmax(axis = var_1109, x = aw_137_cast_fp16)[name = tensor<string, []>("op_1233_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1234_cast_fp16 = softmax(axis = var_1109, x = aw_139_cast_fp16)[name = tensor<string, []>("op_1234_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1235_cast_fp16 = softmax(axis = var_1109, x = aw_141_cast_fp16)[name = tensor<string, []>("op_1235_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1236_cast_fp16 = softmax(axis = var_1109, x = aw_143_cast_fp16)[name = tensor<string, []>("op_1236_cast_fp16")];
+            tensor<string, []> var_1238_equation_0 = const()[name = tensor<string, []>("op_1238_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1238_cast_fp16 = einsum(equation = var_1238_equation_0, values = (var_1188_cast_fp16_0, var_1225_cast_fp16))[name = tensor<string, []>("op_1238_cast_fp16")];
+            tensor<string, []> var_1240_equation_0 = const()[name = tensor<string, []>("op_1240_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1188_cast_fp16_1, var_1226_cast_fp16))[name = tensor<string, []>("op_1240_cast_fp16")];
+            tensor<string, []> var_1242_equation_0 = const()[name = tensor<string, []>("op_1242_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1242_cast_fp16 = einsum(equation = var_1242_equation_0, values = (var_1188_cast_fp16_2, var_1227_cast_fp16))[name = tensor<string, []>("op_1242_cast_fp16")];
+            tensor<string, []> var_1244_equation_0 = const()[name = tensor<string, []>("op_1244_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1188_cast_fp16_3, var_1228_cast_fp16))[name = tensor<string, []>("op_1244_cast_fp16")];
+            tensor<string, []> var_1246_equation_0 = const()[name = tensor<string, []>("op_1246_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1246_cast_fp16 = einsum(equation = var_1246_equation_0, values = (var_1188_cast_fp16_4, var_1229_cast_fp16))[name = tensor<string, []>("op_1246_cast_fp16")];
+            tensor<string, []> var_1248_equation_0 = const()[name = tensor<string, []>("op_1248_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1188_cast_fp16_5, var_1230_cast_fp16))[name = tensor<string, []>("op_1248_cast_fp16")];
+            tensor<string, []> var_1250_equation_0 = const()[name = tensor<string, []>("op_1250_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1250_cast_fp16 = einsum(equation = var_1250_equation_0, values = (var_1188_cast_fp16_6, var_1231_cast_fp16))[name = tensor<string, []>("op_1250_cast_fp16")];
+            tensor<string, []> var_1252_equation_0 = const()[name = tensor<string, []>("op_1252_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1188_cast_fp16_7, var_1232_cast_fp16))[name = tensor<string, []>("op_1252_cast_fp16")];
+            tensor<string, []> var_1254_equation_0 = const()[name = tensor<string, []>("op_1254_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1254_cast_fp16 = einsum(equation = var_1254_equation_0, values = (var_1188_cast_fp16_8, var_1233_cast_fp16))[name = tensor<string, []>("op_1254_cast_fp16")];
+            tensor<string, []> var_1256_equation_0 = const()[name = tensor<string, []>("op_1256_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1188_cast_fp16_9, var_1234_cast_fp16))[name = tensor<string, []>("op_1256_cast_fp16")];
+            tensor<string, []> var_1258_equation_0 = const()[name = tensor<string, []>("op_1258_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1258_cast_fp16 = einsum(equation = var_1258_equation_0, values = (var_1188_cast_fp16_10, var_1235_cast_fp16))[name = tensor<string, []>("op_1258_cast_fp16")];
+            tensor<string, []> var_1260_equation_0 = const()[name = tensor<string, []>("op_1260_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1188_cast_fp16_11, var_1236_cast_fp16))[name = tensor<string, []>("op_1260_cast_fp16")];
+            tensor<bool, []> input_55_interleave_0 = const()[name = tensor<string, []>("input_55_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_55_cast_fp16 = concat(axis = var_1109, interleave = input_55_interleave_0, values = (var_1238_cast_fp16, var_1240_cast_fp16, var_1242_cast_fp16, var_1244_cast_fp16, var_1246_cast_fp16, var_1248_cast_fp16, var_1250_cast_fp16, var_1252_cast_fp16, var_1254_cast_fp16, var_1256_cast_fp16, var_1258_cast_fp16, var_1260_cast_fp16))[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<string, []> var_1269_pad_type_0 = const()[name = tensor<string, []>("op_1269_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1269_strides_0 = const()[name = tensor<string, []>("op_1269_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1269_pad_0 = const()[name = tensor<string, []>("op_1269_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1269_dilations_0 = const()[name = tensor<string, []>("op_1269_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1269_groups_0 = const()[name = tensor<string, []>("op_1269_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_5_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80636416)))];
+            tensor<fp16, [768]> blocks_5_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81816128)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1269_cast_fp16 = conv(bias = blocks_5_attn_out_bias_to_fp16, dilations = var_1269_dilations_0, groups = var_1269_groups_0, pad = var_1269_pad_0, pad_type = var_1269_pad_type_0, strides = var_1269_strides_0, weight = blocks_5_attn_out_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("op_1269_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = var_1269_cast_fp16)[name = tensor<string, []>("inputs_23_cast_fp16")];
+            tensor<int32, [1]> input_57_axes_0 = const()[name = tensor<string, []>("input_57_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_57_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_57_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81817728)))];
+            tensor<fp16, [768]> input_57_beta_0_to_fp16 = const()[name = tensor<string, []>("input_57_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81819328)))];
+            tensor<fp16, []> var_1279_to_fp16 = const()[name = tensor<string, []>("op_1279_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_57_cast_fp16 = layer_norm(axes = input_57_axes_0, beta = input_57_beta_0_to_fp16, epsilon = var_1279_to_fp16, gamma = input_57_gamma_0_to_fp16, x = inputs_23_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<string, []> input_59_pad_type_0 = const()[name = tensor<string, []>("input_59_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_59_strides_0 = const()[name = tensor<string, []>("input_59_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_59_pad_0 = const()[name = tensor<string, []>("input_59_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_59_dilations_0 = const()[name = tensor<string, []>("input_59_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_59_groups_0 = const()[name = tensor<string, []>("input_59_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_5_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81820928)))];
+            tensor<fp16, [3072]> blocks_5_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86539584)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_59_cast_fp16 = conv(bias = blocks_5_mlp_0_bias_to_fp16, dilations = input_59_dilations_0, groups = input_59_groups_0, pad = input_59_pad_0, pad_type = input_59_pad_type_0, strides = input_59_strides_0, weight = blocks_5_mlp_0_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<string, []> input_61_mode_0 = const()[name = tensor<string, []>("input_61_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_61_cast_fp16 = gelu(mode = input_61_mode_0, x = input_59_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
+            tensor<string, []> var_1305_pad_type_0 = const()[name = tensor<string, []>("op_1305_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1305_strides_0 = const()[name = tensor<string, []>("op_1305_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1305_pad_0 = const()[name = tensor<string, []>("op_1305_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1305_dilations_0 = const()[name = tensor<string, []>("op_1305_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1305_groups_0 = const()[name = tensor<string, []>("op_1305_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_5_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86545792)))];
+            tensor<fp16, [768]> blocks_5_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_5_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91264448)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1305_cast_fp16 = conv(bias = blocks_5_mlp_2_bias_to_fp16, dilations = var_1305_dilations_0, groups = var_1305_groups_0, pad = var_1305_pad_0, pad_type = var_1305_pad_type_0, strides = var_1305_strides_0, weight = blocks_5_mlp_2_weight_to_fp16, x = input_61_cast_fp16)[name = tensor<string, []>("op_1305_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = var_1305_cast_fp16)[name = tensor<string, []>("inputs_25_cast_fp16")];
+            tensor<int32, []> var_1314 = const()[name = tensor<string, []>("op_1314"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_63_axes_0 = const()[name = tensor<string, []>("input_63_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_63_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_63_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91266048)))];
+            tensor<fp16, [768]> input_63_beta_0_to_fp16 = const()[name = tensor<string, []>("input_63_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91267648)))];
+            tensor<fp16, []> var_1330_to_fp16 = const()[name = tensor<string, []>("op_1330_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_63_cast_fp16 = layer_norm(axes = input_63_axes_0, beta = input_63_beta_0_to_fp16, epsilon = var_1330_to_fp16, gamma = input_63_gamma_0_to_fp16, x = inputs_25_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
+            tensor<string, []> q_13_pad_type_0 = const()[name = tensor<string, []>("q_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_13_strides_0 = const()[name = tensor<string, []>("q_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_13_pad_0 = const()[name = tensor<string, []>("q_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_13_dilations_0 = const()[name = tensor<string, []>("q_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_13_groups_0 = const()[name = tensor<string, []>("q_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_1365_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1365_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(91269248)))];
+            tensor<fp16, [768]> var_1365_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1365_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92448960)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1365_cast_fp16 = conv(bias = var_1365_bias_0_to_fp16, dilations = q_13_dilations_0, groups = q_13_groups_0, pad = q_13_pad_0, pad_type = q_13_pad_type_0, strides = q_13_strides_0, weight = var_1365_weight_0_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("op_1365_cast_fp16")];
+            tensor<string, []> k_13_pad_type_0 = const()[name = tensor<string, []>("k_13_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_13_strides_0 = const()[name = tensor<string, []>("k_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_13_pad_0 = const()[name = tensor<string, []>("k_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_13_dilations_0 = const()[name = tensor<string, []>("k_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_13_groups_0 = const()[name = tensor<string, []>("k_13_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_6_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(92450560)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_13_cast_fp16 = conv(dilations = k_13_dilations_0, groups = k_13_groups_0, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = k_13_strides_0, weight = blocks_6_attn_key_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("k_13_cast_fp16")];
+            tensor<string, []> var_1363_pad_type_0 = const()[name = tensor<string, []>("op_1363_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1363_strides_0 = const()[name = tensor<string, []>("op_1363_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1363_pad_0 = const()[name = tensor<string, []>("op_1363_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1363_dilations_0 = const()[name = tensor<string, []>("op_1363_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1363_groups_0 = const()[name = tensor<string, []>("op_1363_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_6_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(93630272)))];
+            tensor<fp16, [768]> blocks_6_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94809984)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1363_cast_fp16 = conv(bias = blocks_6_attn_value_bias_to_fp16, dilations = var_1363_dilations_0, groups = var_1363_groups_0, pad = var_1363_pad_0, pad_type = var_1363_pad_type_0, strides = var_1363_strides_0, weight = blocks_6_attn_value_weight_to_fp16, x = input_63_cast_fp16)[name = tensor<string, []>("op_1363_cast_fp16")];
+            tensor<int32, [12]> tile_18 = const()[name = tensor<string, []>("tile_18"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1366_axis_0 = const()[name = tensor<string, []>("op_1366_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1366_cast_fp16_11 = split(axis = var_1366_axis_0, split_sizes = tile_18, x = var_1365_cast_fp16)[name = tensor<string, []>("op_1366_cast_fp16")];
+            tensor<int32, [4]> var_1379_perm_0 = const()[name = tensor<string, []>("op_1379_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_19 = const()[name = tensor<string, []>("tile_19"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1380_axis_0 = const()[name = tensor<string, []>("op_1380_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_1379_cast_fp16 = transpose(perm = var_1379_perm_0, x = k_13_cast_fp16)[name = tensor<string, []>("transpose_6")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1380_cast_fp16_11 = split(axis = var_1380_axis_0, split_sizes = tile_19, x = var_1379_cast_fp16)[name = tensor<string, []>("op_1380_cast_fp16")];
+            tensor<int32, [12]> tile_20 = const()[name = tensor<string, []>("tile_20"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1393_axis_0 = const()[name = tensor<string, []>("op_1393_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1393_cast_fp16_11 = split(axis = var_1393_axis_0, split_sizes = tile_20, x = var_1363_cast_fp16)[name = tensor<string, []>("op_1393_cast_fp16")];
+            tensor<string, []> aw_145_equation_0 = const()[name = tensor<string, []>("aw_145_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_145_cast_fp16 = einsum(equation = aw_145_equation_0, values = (var_1380_cast_fp16_0, var_1366_cast_fp16_0))[name = tensor<string, []>("aw_145_cast_fp16")];
+            tensor<string, []> aw_147_equation_0 = const()[name = tensor<string, []>("aw_147_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_147_cast_fp16 = einsum(equation = aw_147_equation_0, values = (var_1380_cast_fp16_1, var_1366_cast_fp16_1))[name = tensor<string, []>("aw_147_cast_fp16")];
+            tensor<string, []> aw_149_equation_0 = const()[name = tensor<string, []>("aw_149_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_149_cast_fp16 = einsum(equation = aw_149_equation_0, values = (var_1380_cast_fp16_2, var_1366_cast_fp16_2))[name = tensor<string, []>("aw_149_cast_fp16")];
+            tensor<string, []> aw_151_equation_0 = const()[name = tensor<string, []>("aw_151_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_151_cast_fp16 = einsum(equation = aw_151_equation_0, values = (var_1380_cast_fp16_3, var_1366_cast_fp16_3))[name = tensor<string, []>("aw_151_cast_fp16")];
+            tensor<string, []> aw_153_equation_0 = const()[name = tensor<string, []>("aw_153_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_153_cast_fp16 = einsum(equation = aw_153_equation_0, values = (var_1380_cast_fp16_4, var_1366_cast_fp16_4))[name = tensor<string, []>("aw_153_cast_fp16")];
+            tensor<string, []> aw_155_equation_0 = const()[name = tensor<string, []>("aw_155_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_155_cast_fp16 = einsum(equation = aw_155_equation_0, values = (var_1380_cast_fp16_5, var_1366_cast_fp16_5))[name = tensor<string, []>("aw_155_cast_fp16")];
+            tensor<string, []> aw_157_equation_0 = const()[name = tensor<string, []>("aw_157_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_157_cast_fp16 = einsum(equation = aw_157_equation_0, values = (var_1380_cast_fp16_6, var_1366_cast_fp16_6))[name = tensor<string, []>("aw_157_cast_fp16")];
+            tensor<string, []> aw_159_equation_0 = const()[name = tensor<string, []>("aw_159_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_159_cast_fp16 = einsum(equation = aw_159_equation_0, values = (var_1380_cast_fp16_7, var_1366_cast_fp16_7))[name = tensor<string, []>("aw_159_cast_fp16")];
+            tensor<string, []> aw_161_equation_0 = const()[name = tensor<string, []>("aw_161_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_161_cast_fp16 = einsum(equation = aw_161_equation_0, values = (var_1380_cast_fp16_8, var_1366_cast_fp16_8))[name = tensor<string, []>("aw_161_cast_fp16")];
+            tensor<string, []> aw_163_equation_0 = const()[name = tensor<string, []>("aw_163_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_163_cast_fp16 = einsum(equation = aw_163_equation_0, values = (var_1380_cast_fp16_9, var_1366_cast_fp16_9))[name = tensor<string, []>("aw_163_cast_fp16")];
+            tensor<string, []> aw_165_equation_0 = const()[name = tensor<string, []>("aw_165_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_165_cast_fp16 = einsum(equation = aw_165_equation_0, values = (var_1380_cast_fp16_10, var_1366_cast_fp16_10))[name = tensor<string, []>("aw_165_cast_fp16")];
+            tensor<string, []> aw_167_equation_0 = const()[name = tensor<string, []>("aw_167_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_167_cast_fp16 = einsum(equation = aw_167_equation_0, values = (var_1380_cast_fp16_11, var_1366_cast_fp16_11))[name = tensor<string, []>("aw_167_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1430_cast_fp16 = softmax(axis = var_1314, x = aw_145_cast_fp16)[name = tensor<string, []>("op_1430_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1431_cast_fp16 = softmax(axis = var_1314, x = aw_147_cast_fp16)[name = tensor<string, []>("op_1431_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1432_cast_fp16 = softmax(axis = var_1314, x = aw_149_cast_fp16)[name = tensor<string, []>("op_1432_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1433_cast_fp16 = softmax(axis = var_1314, x = aw_151_cast_fp16)[name = tensor<string, []>("op_1433_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1434_cast_fp16 = softmax(axis = var_1314, x = aw_153_cast_fp16)[name = tensor<string, []>("op_1434_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1435_cast_fp16 = softmax(axis = var_1314, x = aw_155_cast_fp16)[name = tensor<string, []>("op_1435_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1436_cast_fp16 = softmax(axis = var_1314, x = aw_157_cast_fp16)[name = tensor<string, []>("op_1436_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1437_cast_fp16 = softmax(axis = var_1314, x = aw_159_cast_fp16)[name = tensor<string, []>("op_1437_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1438_cast_fp16 = softmax(axis = var_1314, x = aw_161_cast_fp16)[name = tensor<string, []>("op_1438_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1439_cast_fp16 = softmax(axis = var_1314, x = aw_163_cast_fp16)[name = tensor<string, []>("op_1439_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1440_cast_fp16 = softmax(axis = var_1314, x = aw_165_cast_fp16)[name = tensor<string, []>("op_1440_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1441_cast_fp16 = softmax(axis = var_1314, x = aw_167_cast_fp16)[name = tensor<string, []>("op_1441_cast_fp16")];
+            tensor<string, []> var_1443_equation_0 = const()[name = tensor<string, []>("op_1443_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1443_cast_fp16 = einsum(equation = var_1443_equation_0, values = (var_1393_cast_fp16_0, var_1430_cast_fp16))[name = tensor<string, []>("op_1443_cast_fp16")];
+            tensor<string, []> var_1445_equation_0 = const()[name = tensor<string, []>("op_1445_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1445_cast_fp16 = einsum(equation = var_1445_equation_0, values = (var_1393_cast_fp16_1, var_1431_cast_fp16))[name = tensor<string, []>("op_1445_cast_fp16")];
+            tensor<string, []> var_1447_equation_0 = const()[name = tensor<string, []>("op_1447_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1447_cast_fp16 = einsum(equation = var_1447_equation_0, values = (var_1393_cast_fp16_2, var_1432_cast_fp16))[name = tensor<string, []>("op_1447_cast_fp16")];
+            tensor<string, []> var_1449_equation_0 = const()[name = tensor<string, []>("op_1449_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1449_cast_fp16 = einsum(equation = var_1449_equation_0, values = (var_1393_cast_fp16_3, var_1433_cast_fp16))[name = tensor<string, []>("op_1449_cast_fp16")];
+            tensor<string, []> var_1451_equation_0 = const()[name = tensor<string, []>("op_1451_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1451_cast_fp16 = einsum(equation = var_1451_equation_0, values = (var_1393_cast_fp16_4, var_1434_cast_fp16))[name = tensor<string, []>("op_1451_cast_fp16")];
+            tensor<string, []> var_1453_equation_0 = const()[name = tensor<string, []>("op_1453_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1453_cast_fp16 = einsum(equation = var_1453_equation_0, values = (var_1393_cast_fp16_5, var_1435_cast_fp16))[name = tensor<string, []>("op_1453_cast_fp16")];
+            tensor<string, []> var_1455_equation_0 = const()[name = tensor<string, []>("op_1455_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1455_cast_fp16 = einsum(equation = var_1455_equation_0, values = (var_1393_cast_fp16_6, var_1436_cast_fp16))[name = tensor<string, []>("op_1455_cast_fp16")];
+            tensor<string, []> var_1457_equation_0 = const()[name = tensor<string, []>("op_1457_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1457_cast_fp16 = einsum(equation = var_1457_equation_0, values = (var_1393_cast_fp16_7, var_1437_cast_fp16))[name = tensor<string, []>("op_1457_cast_fp16")];
+            tensor<string, []> var_1459_equation_0 = const()[name = tensor<string, []>("op_1459_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1459_cast_fp16 = einsum(equation = var_1459_equation_0, values = (var_1393_cast_fp16_8, var_1438_cast_fp16))[name = tensor<string, []>("op_1459_cast_fp16")];
+            tensor<string, []> var_1461_equation_0 = const()[name = tensor<string, []>("op_1461_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1461_cast_fp16 = einsum(equation = var_1461_equation_0, values = (var_1393_cast_fp16_9, var_1439_cast_fp16))[name = tensor<string, []>("op_1461_cast_fp16")];
+            tensor<string, []> var_1463_equation_0 = const()[name = tensor<string, []>("op_1463_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1463_cast_fp16 = einsum(equation = var_1463_equation_0, values = (var_1393_cast_fp16_10, var_1440_cast_fp16))[name = tensor<string, []>("op_1463_cast_fp16")];
+            tensor<string, []> var_1465_equation_0 = const()[name = tensor<string, []>("op_1465_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1465_cast_fp16 = einsum(equation = var_1465_equation_0, values = (var_1393_cast_fp16_11, var_1441_cast_fp16))[name = tensor<string, []>("op_1465_cast_fp16")];
+            tensor<bool, []> input_65_interleave_0 = const()[name = tensor<string, []>("input_65_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_65_cast_fp16 = concat(axis = var_1314, interleave = input_65_interleave_0, values = (var_1443_cast_fp16, var_1445_cast_fp16, var_1447_cast_fp16, var_1449_cast_fp16, var_1451_cast_fp16, var_1453_cast_fp16, var_1455_cast_fp16, var_1457_cast_fp16, var_1459_cast_fp16, var_1461_cast_fp16, var_1463_cast_fp16, var_1465_cast_fp16))[name = tensor<string, []>("input_65_cast_fp16")];
+            tensor<string, []> var_1474_pad_type_0 = const()[name = tensor<string, []>("op_1474_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1474_strides_0 = const()[name = tensor<string, []>("op_1474_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1474_pad_0 = const()[name = tensor<string, []>("op_1474_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1474_dilations_0 = const()[name = tensor<string, []>("op_1474_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1474_groups_0 = const()[name = tensor<string, []>("op_1474_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_6_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(94811584)))];
+            tensor<fp16, [768]> blocks_6_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95991296)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1474_cast_fp16 = conv(bias = blocks_6_attn_out_bias_to_fp16, dilations = var_1474_dilations_0, groups = var_1474_groups_0, pad = var_1474_pad_0, pad_type = var_1474_pad_type_0, strides = var_1474_strides_0, weight = blocks_6_attn_out_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("op_1474_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = var_1474_cast_fp16)[name = tensor<string, []>("inputs_27_cast_fp16")];
+            tensor<int32, [1]> input_67_axes_0 = const()[name = tensor<string, []>("input_67_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_67_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_67_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95992896)))];
+            tensor<fp16, [768]> input_67_beta_0_to_fp16 = const()[name = tensor<string, []>("input_67_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95994496)))];
+            tensor<fp16, []> var_1484_to_fp16 = const()[name = tensor<string, []>("op_1484_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_67_cast_fp16 = layer_norm(axes = input_67_axes_0, beta = input_67_beta_0_to_fp16, epsilon = var_1484_to_fp16, gamma = input_67_gamma_0_to_fp16, x = inputs_27_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
+            tensor<string, []> input_69_pad_type_0 = const()[name = tensor<string, []>("input_69_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_69_strides_0 = const()[name = tensor<string, []>("input_69_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_69_pad_0 = const()[name = tensor<string, []>("input_69_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_69_dilations_0 = const()[name = tensor<string, []>("input_69_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_69_groups_0 = const()[name = tensor<string, []>("input_69_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_6_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(95996096)))];
+            tensor<fp16, [3072]> blocks_6_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100714752)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_69_cast_fp16 = conv(bias = blocks_6_mlp_0_bias_to_fp16, dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = blocks_6_mlp_0_weight_to_fp16, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
+            tensor<string, []> input_71_mode_0 = const()[name = tensor<string, []>("input_71_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
+            tensor<string, []> var_1510_pad_type_0 = const()[name = tensor<string, []>("op_1510_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1510_strides_0 = const()[name = tensor<string, []>("op_1510_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1510_pad_0 = const()[name = tensor<string, []>("op_1510_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1510_dilations_0 = const()[name = tensor<string, []>("op_1510_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1510_groups_0 = const()[name = tensor<string, []>("op_1510_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_6_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(100720960)))];
+            tensor<fp16, [768]> blocks_6_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_6_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105439616)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1510_cast_fp16 = conv(bias = blocks_6_mlp_2_bias_to_fp16, dilations = var_1510_dilations_0, groups = var_1510_groups_0, pad = var_1510_pad_0, pad_type = var_1510_pad_type_0, strides = var_1510_strides_0, weight = blocks_6_mlp_2_weight_to_fp16, x = input_71_cast_fp16)[name = tensor<string, []>("op_1510_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = var_1510_cast_fp16)[name = tensor<string, []>("inputs_29_cast_fp16")];
+            tensor<int32, []> var_1519 = const()[name = tensor<string, []>("op_1519"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_73_axes_0 = const()[name = tensor<string, []>("input_73_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_73_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_73_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105441216)))];
+            tensor<fp16, [768]> input_73_beta_0_to_fp16 = const()[name = tensor<string, []>("input_73_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105442816)))];
+            tensor<fp16, []> var_1535_to_fp16 = const()[name = tensor<string, []>("op_1535_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_73_cast_fp16 = layer_norm(axes = input_73_axes_0, beta = input_73_beta_0_to_fp16, epsilon = var_1535_to_fp16, gamma = input_73_gamma_0_to_fp16, x = inputs_29_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
+            tensor<string, []> q_15_pad_type_0 = const()[name = tensor<string, []>("q_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_15_strides_0 = const()[name = tensor<string, []>("q_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_15_pad_0 = const()[name = tensor<string, []>("q_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_15_dilations_0 = const()[name = tensor<string, []>("q_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_15_groups_0 = const()[name = tensor<string, []>("q_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_1570_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1570_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(105444416)))];
+            tensor<fp16, [768]> var_1570_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1570_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106624128)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1570_cast_fp16 = conv(bias = var_1570_bias_0_to_fp16, dilations = q_15_dilations_0, groups = q_15_groups_0, pad = q_15_pad_0, pad_type = q_15_pad_type_0, strides = q_15_strides_0, weight = var_1570_weight_0_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("op_1570_cast_fp16")];
+            tensor<string, []> k_15_pad_type_0 = const()[name = tensor<string, []>("k_15_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_15_strides_0 = const()[name = tensor<string, []>("k_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_15_pad_0 = const()[name = tensor<string, []>("k_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_15_dilations_0 = const()[name = tensor<string, []>("k_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_15_groups_0 = const()[name = tensor<string, []>("k_15_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_7_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(106625728)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_15_cast_fp16 = conv(dilations = k_15_dilations_0, groups = k_15_groups_0, pad = k_15_pad_0, pad_type = k_15_pad_type_0, strides = k_15_strides_0, weight = blocks_7_attn_key_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("k_15_cast_fp16")];
+            tensor<string, []> var_1568_pad_type_0 = const()[name = tensor<string, []>("op_1568_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1568_strides_0 = const()[name = tensor<string, []>("op_1568_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1568_pad_0 = const()[name = tensor<string, []>("op_1568_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1568_dilations_0 = const()[name = tensor<string, []>("op_1568_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1568_groups_0 = const()[name = tensor<string, []>("op_1568_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_7_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(107805440)))];
+            tensor<fp16, [768]> blocks_7_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108985152)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1568_cast_fp16 = conv(bias = blocks_7_attn_value_bias_to_fp16, dilations = var_1568_dilations_0, groups = var_1568_groups_0, pad = var_1568_pad_0, pad_type = var_1568_pad_type_0, strides = var_1568_strides_0, weight = blocks_7_attn_value_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("op_1568_cast_fp16")];
+            tensor<int32, [12]> tile_21 = const()[name = tensor<string, []>("tile_21"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1571_axis_0 = const()[name = tensor<string, []>("op_1571_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1571_cast_fp16_11 = split(axis = var_1571_axis_0, split_sizes = tile_21, x = var_1570_cast_fp16)[name = tensor<string, []>("op_1571_cast_fp16")];
+            tensor<int32, [4]> var_1584_perm_0 = const()[name = tensor<string, []>("op_1584_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_22 = const()[name = tensor<string, []>("tile_22"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1585_axis_0 = const()[name = tensor<string, []>("op_1585_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_1584_cast_fp16 = transpose(perm = var_1584_perm_0, x = k_15_cast_fp16)[name = tensor<string, []>("transpose_5")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1585_cast_fp16_11 = split(axis = var_1585_axis_0, split_sizes = tile_22, x = var_1584_cast_fp16)[name = tensor<string, []>("op_1585_cast_fp16")];
+            tensor<int32, [12]> tile_23 = const()[name = tensor<string, []>("tile_23"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1598_axis_0 = const()[name = tensor<string, []>("op_1598_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1598_cast_fp16_11 = split(axis = var_1598_axis_0, split_sizes = tile_23, x = var_1568_cast_fp16)[name = tensor<string, []>("op_1598_cast_fp16")];
+            tensor<string, []> aw_169_equation_0 = const()[name = tensor<string, []>("aw_169_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_169_cast_fp16 = einsum(equation = aw_169_equation_0, values = (var_1585_cast_fp16_0, var_1571_cast_fp16_0))[name = tensor<string, []>("aw_169_cast_fp16")];
+            tensor<string, []> aw_171_equation_0 = const()[name = tensor<string, []>("aw_171_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_171_cast_fp16 = einsum(equation = aw_171_equation_0, values = (var_1585_cast_fp16_1, var_1571_cast_fp16_1))[name = tensor<string, []>("aw_171_cast_fp16")];
+            tensor<string, []> aw_173_equation_0 = const()[name = tensor<string, []>("aw_173_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_173_cast_fp16 = einsum(equation = aw_173_equation_0, values = (var_1585_cast_fp16_2, var_1571_cast_fp16_2))[name = tensor<string, []>("aw_173_cast_fp16")];
+            tensor<string, []> aw_175_equation_0 = const()[name = tensor<string, []>("aw_175_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_175_cast_fp16 = einsum(equation = aw_175_equation_0, values = (var_1585_cast_fp16_3, var_1571_cast_fp16_3))[name = tensor<string, []>("aw_175_cast_fp16")];
+            tensor<string, []> aw_177_equation_0 = const()[name = tensor<string, []>("aw_177_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_177_cast_fp16 = einsum(equation = aw_177_equation_0, values = (var_1585_cast_fp16_4, var_1571_cast_fp16_4))[name = tensor<string, []>("aw_177_cast_fp16")];
+            tensor<string, []> aw_179_equation_0 = const()[name = tensor<string, []>("aw_179_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_179_cast_fp16 = einsum(equation = aw_179_equation_0, values = (var_1585_cast_fp16_5, var_1571_cast_fp16_5))[name = tensor<string, []>("aw_179_cast_fp16")];
+            tensor<string, []> aw_181_equation_0 = const()[name = tensor<string, []>("aw_181_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_181_cast_fp16 = einsum(equation = aw_181_equation_0, values = (var_1585_cast_fp16_6, var_1571_cast_fp16_6))[name = tensor<string, []>("aw_181_cast_fp16")];
+            tensor<string, []> aw_183_equation_0 = const()[name = tensor<string, []>("aw_183_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_183_cast_fp16 = einsum(equation = aw_183_equation_0, values = (var_1585_cast_fp16_7, var_1571_cast_fp16_7))[name = tensor<string, []>("aw_183_cast_fp16")];
+            tensor<string, []> aw_185_equation_0 = const()[name = tensor<string, []>("aw_185_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_185_cast_fp16 = einsum(equation = aw_185_equation_0, values = (var_1585_cast_fp16_8, var_1571_cast_fp16_8))[name = tensor<string, []>("aw_185_cast_fp16")];
+            tensor<string, []> aw_187_equation_0 = const()[name = tensor<string, []>("aw_187_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_187_cast_fp16 = einsum(equation = aw_187_equation_0, values = (var_1585_cast_fp16_9, var_1571_cast_fp16_9))[name = tensor<string, []>("aw_187_cast_fp16")];
+            tensor<string, []> aw_189_equation_0 = const()[name = tensor<string, []>("aw_189_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_189_cast_fp16 = einsum(equation = aw_189_equation_0, values = (var_1585_cast_fp16_10, var_1571_cast_fp16_10))[name = tensor<string, []>("aw_189_cast_fp16")];
+            tensor<string, []> aw_191_equation_0 = const()[name = tensor<string, []>("aw_191_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_191_cast_fp16 = einsum(equation = aw_191_equation_0, values = (var_1585_cast_fp16_11, var_1571_cast_fp16_11))[name = tensor<string, []>("aw_191_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1635_cast_fp16 = softmax(axis = var_1519, x = aw_169_cast_fp16)[name = tensor<string, []>("op_1635_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1636_cast_fp16 = softmax(axis = var_1519, x = aw_171_cast_fp16)[name = tensor<string, []>("op_1636_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1637_cast_fp16 = softmax(axis = var_1519, x = aw_173_cast_fp16)[name = tensor<string, []>("op_1637_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1638_cast_fp16 = softmax(axis = var_1519, x = aw_175_cast_fp16)[name = tensor<string, []>("op_1638_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1639_cast_fp16 = softmax(axis = var_1519, x = aw_177_cast_fp16)[name = tensor<string, []>("op_1639_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1640_cast_fp16 = softmax(axis = var_1519, x = aw_179_cast_fp16)[name = tensor<string, []>("op_1640_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1641_cast_fp16 = softmax(axis = var_1519, x = aw_181_cast_fp16)[name = tensor<string, []>("op_1641_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1642_cast_fp16 = softmax(axis = var_1519, x = aw_183_cast_fp16)[name = tensor<string, []>("op_1642_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1643_cast_fp16 = softmax(axis = var_1519, x = aw_185_cast_fp16)[name = tensor<string, []>("op_1643_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1644_cast_fp16 = softmax(axis = var_1519, x = aw_187_cast_fp16)[name = tensor<string, []>("op_1644_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1645_cast_fp16 = softmax(axis = var_1519, x = aw_189_cast_fp16)[name = tensor<string, []>("op_1645_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1646_cast_fp16 = softmax(axis = var_1519, x = aw_191_cast_fp16)[name = tensor<string, []>("op_1646_cast_fp16")];
+            tensor<string, []> var_1648_equation_0 = const()[name = tensor<string, []>("op_1648_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1648_cast_fp16 = einsum(equation = var_1648_equation_0, values = (var_1598_cast_fp16_0, var_1635_cast_fp16))[name = tensor<string, []>("op_1648_cast_fp16")];
+            tensor<string, []> var_1650_equation_0 = const()[name = tensor<string, []>("op_1650_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1650_cast_fp16 = einsum(equation = var_1650_equation_0, values = (var_1598_cast_fp16_1, var_1636_cast_fp16))[name = tensor<string, []>("op_1650_cast_fp16")];
+            tensor<string, []> var_1652_equation_0 = const()[name = tensor<string, []>("op_1652_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1652_cast_fp16 = einsum(equation = var_1652_equation_0, values = (var_1598_cast_fp16_2, var_1637_cast_fp16))[name = tensor<string, []>("op_1652_cast_fp16")];
+            tensor<string, []> var_1654_equation_0 = const()[name = tensor<string, []>("op_1654_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1654_cast_fp16 = einsum(equation = var_1654_equation_0, values = (var_1598_cast_fp16_3, var_1638_cast_fp16))[name = tensor<string, []>("op_1654_cast_fp16")];
+            tensor<string, []> var_1656_equation_0 = const()[name = tensor<string, []>("op_1656_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1656_cast_fp16 = einsum(equation = var_1656_equation_0, values = (var_1598_cast_fp16_4, var_1639_cast_fp16))[name = tensor<string, []>("op_1656_cast_fp16")];
+            tensor<string, []> var_1658_equation_0 = const()[name = tensor<string, []>("op_1658_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1658_cast_fp16 = einsum(equation = var_1658_equation_0, values = (var_1598_cast_fp16_5, var_1640_cast_fp16))[name = tensor<string, []>("op_1658_cast_fp16")];
+            tensor<string, []> var_1660_equation_0 = const()[name = tensor<string, []>("op_1660_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1660_cast_fp16 = einsum(equation = var_1660_equation_0, values = (var_1598_cast_fp16_6, var_1641_cast_fp16))[name = tensor<string, []>("op_1660_cast_fp16")];
+            tensor<string, []> var_1662_equation_0 = const()[name = tensor<string, []>("op_1662_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1662_cast_fp16 = einsum(equation = var_1662_equation_0, values = (var_1598_cast_fp16_7, var_1642_cast_fp16))[name = tensor<string, []>("op_1662_cast_fp16")];
+            tensor<string, []> var_1664_equation_0 = const()[name = tensor<string, []>("op_1664_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1664_cast_fp16 = einsum(equation = var_1664_equation_0, values = (var_1598_cast_fp16_8, var_1643_cast_fp16))[name = tensor<string, []>("op_1664_cast_fp16")];
+            tensor<string, []> var_1666_equation_0 = const()[name = tensor<string, []>("op_1666_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1666_cast_fp16 = einsum(equation = var_1666_equation_0, values = (var_1598_cast_fp16_9, var_1644_cast_fp16))[name = tensor<string, []>("op_1666_cast_fp16")];
+            tensor<string, []> var_1668_equation_0 = const()[name = tensor<string, []>("op_1668_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1668_cast_fp16 = einsum(equation = var_1668_equation_0, values = (var_1598_cast_fp16_10, var_1645_cast_fp16))[name = tensor<string, []>("op_1668_cast_fp16")];
+            tensor<string, []> var_1670_equation_0 = const()[name = tensor<string, []>("op_1670_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1670_cast_fp16 = einsum(equation = var_1670_equation_0, values = (var_1598_cast_fp16_11, var_1646_cast_fp16))[name = tensor<string, []>("op_1670_cast_fp16")];
+            tensor<bool, []> input_75_interleave_0 = const()[name = tensor<string, []>("input_75_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_75_cast_fp16 = concat(axis = var_1519, interleave = input_75_interleave_0, values = (var_1648_cast_fp16, var_1650_cast_fp16, var_1652_cast_fp16, var_1654_cast_fp16, var_1656_cast_fp16, var_1658_cast_fp16, var_1660_cast_fp16, var_1662_cast_fp16, var_1664_cast_fp16, var_1666_cast_fp16, var_1668_cast_fp16, var_1670_cast_fp16))[name = tensor<string, []>("input_75_cast_fp16")];
+            tensor<string, []> var_1679_pad_type_0 = const()[name = tensor<string, []>("op_1679_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1679_strides_0 = const()[name = tensor<string, []>("op_1679_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1679_pad_0 = const()[name = tensor<string, []>("op_1679_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1679_dilations_0 = const()[name = tensor<string, []>("op_1679_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1679_groups_0 = const()[name = tensor<string, []>("op_1679_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_7_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(108986752)))];
+            tensor<fp16, [768]> blocks_7_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110166464)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1679_cast_fp16 = conv(bias = blocks_7_attn_out_bias_to_fp16, dilations = var_1679_dilations_0, groups = var_1679_groups_0, pad = var_1679_pad_0, pad_type = var_1679_pad_type_0, strides = var_1679_strides_0, weight = blocks_7_attn_out_weight_to_fp16, x = input_75_cast_fp16)[name = tensor<string, []>("op_1679_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = var_1679_cast_fp16)[name = tensor<string, []>("inputs_31_cast_fp16")];
+            tensor<int32, [1]> input_77_axes_0 = const()[name = tensor<string, []>("input_77_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_77_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_77_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110168064)))];
+            tensor<fp16, [768]> input_77_beta_0_to_fp16 = const()[name = tensor<string, []>("input_77_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110169664)))];
+            tensor<fp16, []> var_1689_to_fp16 = const()[name = tensor<string, []>("op_1689_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_77_cast_fp16 = layer_norm(axes = input_77_axes_0, beta = input_77_beta_0_to_fp16, epsilon = var_1689_to_fp16, gamma = input_77_gamma_0_to_fp16, x = inputs_31_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
+            tensor<string, []> input_79_pad_type_0 = const()[name = tensor<string, []>("input_79_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_79_strides_0 = const()[name = tensor<string, []>("input_79_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_79_pad_0 = const()[name = tensor<string, []>("input_79_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_79_dilations_0 = const()[name = tensor<string, []>("input_79_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_79_groups_0 = const()[name = tensor<string, []>("input_79_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_7_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(110171264)))];
+            tensor<fp16, [3072]> blocks_7_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114889920)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_79_cast_fp16 = conv(bias = blocks_7_mlp_0_bias_to_fp16, dilations = input_79_dilations_0, groups = input_79_groups_0, pad = input_79_pad_0, pad_type = input_79_pad_type_0, strides = input_79_strides_0, weight = blocks_7_mlp_0_weight_to_fp16, x = input_77_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
+            tensor<string, []> input_81_mode_0 = const()[name = tensor<string, []>("input_81_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_81_cast_fp16 = gelu(mode = input_81_mode_0, x = input_79_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
+            tensor<string, []> var_1715_pad_type_0 = const()[name = tensor<string, []>("op_1715_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1715_strides_0 = const()[name = tensor<string, []>("op_1715_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1715_pad_0 = const()[name = tensor<string, []>("op_1715_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1715_dilations_0 = const()[name = tensor<string, []>("op_1715_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1715_groups_0 = const()[name = tensor<string, []>("op_1715_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_7_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(114896128)))];
+            tensor<fp16, [768]> blocks_7_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_7_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119614784)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1715_cast_fp16 = conv(bias = blocks_7_mlp_2_bias_to_fp16, dilations = var_1715_dilations_0, groups = var_1715_groups_0, pad = var_1715_pad_0, pad_type = var_1715_pad_type_0, strides = var_1715_strides_0, weight = blocks_7_mlp_2_weight_to_fp16, x = input_81_cast_fp16)[name = tensor<string, []>("op_1715_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = var_1715_cast_fp16)[name = tensor<string, []>("inputs_33_cast_fp16")];
+            tensor<int32, []> var_1724 = const()[name = tensor<string, []>("op_1724"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_83_axes_0 = const()[name = tensor<string, []>("input_83_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_83_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_83_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119616384)))];
+            tensor<fp16, [768]> input_83_beta_0_to_fp16 = const()[name = tensor<string, []>("input_83_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119617984)))];
+            tensor<fp16, []> var_1740_to_fp16 = const()[name = tensor<string, []>("op_1740_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_83_cast_fp16 = layer_norm(axes = input_83_axes_0, beta = input_83_beta_0_to_fp16, epsilon = var_1740_to_fp16, gamma = input_83_gamma_0_to_fp16, x = inputs_33_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
+            tensor<string, []> q_17_pad_type_0 = const()[name = tensor<string, []>("q_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_17_strides_0 = const()[name = tensor<string, []>("q_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_17_pad_0 = const()[name = tensor<string, []>("q_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_17_dilations_0 = const()[name = tensor<string, []>("q_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_17_groups_0 = const()[name = tensor<string, []>("q_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_1775_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1775_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(119619584)))];
+            tensor<fp16, [768]> var_1775_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1775_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120799296)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1775_cast_fp16 = conv(bias = var_1775_bias_0_to_fp16, dilations = q_17_dilations_0, groups = q_17_groups_0, pad = q_17_pad_0, pad_type = q_17_pad_type_0, strides = q_17_strides_0, weight = var_1775_weight_0_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("op_1775_cast_fp16")];
+            tensor<string, []> k_17_pad_type_0 = const()[name = tensor<string, []>("k_17_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_17_strides_0 = const()[name = tensor<string, []>("k_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_17_pad_0 = const()[name = tensor<string, []>("k_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_17_dilations_0 = const()[name = tensor<string, []>("k_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_17_groups_0 = const()[name = tensor<string, []>("k_17_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_8_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(120800896)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_17_cast_fp16 = conv(dilations = k_17_dilations_0, groups = k_17_groups_0, pad = k_17_pad_0, pad_type = k_17_pad_type_0, strides = k_17_strides_0, weight = blocks_8_attn_key_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("k_17_cast_fp16")];
+            tensor<string, []> var_1773_pad_type_0 = const()[name = tensor<string, []>("op_1773_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1773_strides_0 = const()[name = tensor<string, []>("op_1773_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1773_pad_0 = const()[name = tensor<string, []>("op_1773_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1773_dilations_0 = const()[name = tensor<string, []>("op_1773_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1773_groups_0 = const()[name = tensor<string, []>("op_1773_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_8_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(121980608)))];
+            tensor<fp16, [768]> blocks_8_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123160320)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1773_cast_fp16 = conv(bias = blocks_8_attn_value_bias_to_fp16, dilations = var_1773_dilations_0, groups = var_1773_groups_0, pad = var_1773_pad_0, pad_type = var_1773_pad_type_0, strides = var_1773_strides_0, weight = blocks_8_attn_value_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("op_1773_cast_fp16")];
+            tensor<int32, [12]> tile_24 = const()[name = tensor<string, []>("tile_24"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1776_axis_0 = const()[name = tensor<string, []>("op_1776_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1776_cast_fp16_11 = split(axis = var_1776_axis_0, split_sizes = tile_24, x = var_1775_cast_fp16)[name = tensor<string, []>("op_1776_cast_fp16")];
+            tensor<int32, [4]> var_1789_perm_0 = const()[name = tensor<string, []>("op_1789_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_25 = const()[name = tensor<string, []>("tile_25"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1790_axis_0 = const()[name = tensor<string, []>("op_1790_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_1789_cast_fp16 = transpose(perm = var_1789_perm_0, x = k_17_cast_fp16)[name = tensor<string, []>("transpose_4")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1790_cast_fp16_11 = split(axis = var_1790_axis_0, split_sizes = tile_25, x = var_1789_cast_fp16)[name = tensor<string, []>("op_1790_cast_fp16")];
+            tensor<int32, [12]> tile_26 = const()[name = tensor<string, []>("tile_26"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1803_axis_0 = const()[name = tensor<string, []>("op_1803_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1803_cast_fp16_11 = split(axis = var_1803_axis_0, split_sizes = tile_26, x = var_1773_cast_fp16)[name = tensor<string, []>("op_1803_cast_fp16")];
+            tensor<string, []> aw_193_equation_0 = const()[name = tensor<string, []>("aw_193_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_193_cast_fp16 = einsum(equation = aw_193_equation_0, values = (var_1790_cast_fp16_0, var_1776_cast_fp16_0))[name = tensor<string, []>("aw_193_cast_fp16")];
+            tensor<string, []> aw_195_equation_0 = const()[name = tensor<string, []>("aw_195_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_195_cast_fp16 = einsum(equation = aw_195_equation_0, values = (var_1790_cast_fp16_1, var_1776_cast_fp16_1))[name = tensor<string, []>("aw_195_cast_fp16")];
+            tensor<string, []> aw_197_equation_0 = const()[name = tensor<string, []>("aw_197_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_197_cast_fp16 = einsum(equation = aw_197_equation_0, values = (var_1790_cast_fp16_2, var_1776_cast_fp16_2))[name = tensor<string, []>("aw_197_cast_fp16")];
+            tensor<string, []> aw_199_equation_0 = const()[name = tensor<string, []>("aw_199_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_199_cast_fp16 = einsum(equation = aw_199_equation_0, values = (var_1790_cast_fp16_3, var_1776_cast_fp16_3))[name = tensor<string, []>("aw_199_cast_fp16")];
+            tensor<string, []> aw_201_equation_0 = const()[name = tensor<string, []>("aw_201_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_201_cast_fp16 = einsum(equation = aw_201_equation_0, values = (var_1790_cast_fp16_4, var_1776_cast_fp16_4))[name = tensor<string, []>("aw_201_cast_fp16")];
+            tensor<string, []> aw_203_equation_0 = const()[name = tensor<string, []>("aw_203_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_203_cast_fp16 = einsum(equation = aw_203_equation_0, values = (var_1790_cast_fp16_5, var_1776_cast_fp16_5))[name = tensor<string, []>("aw_203_cast_fp16")];
+            tensor<string, []> aw_205_equation_0 = const()[name = tensor<string, []>("aw_205_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_205_cast_fp16 = einsum(equation = aw_205_equation_0, values = (var_1790_cast_fp16_6, var_1776_cast_fp16_6))[name = tensor<string, []>("aw_205_cast_fp16")];
+            tensor<string, []> aw_207_equation_0 = const()[name = tensor<string, []>("aw_207_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_207_cast_fp16 = einsum(equation = aw_207_equation_0, values = (var_1790_cast_fp16_7, var_1776_cast_fp16_7))[name = tensor<string, []>("aw_207_cast_fp16")];
+            tensor<string, []> aw_209_equation_0 = const()[name = tensor<string, []>("aw_209_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_209_cast_fp16 = einsum(equation = aw_209_equation_0, values = (var_1790_cast_fp16_8, var_1776_cast_fp16_8))[name = tensor<string, []>("aw_209_cast_fp16")];
+            tensor<string, []> aw_211_equation_0 = const()[name = tensor<string, []>("aw_211_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_211_cast_fp16 = einsum(equation = aw_211_equation_0, values = (var_1790_cast_fp16_9, var_1776_cast_fp16_9))[name = tensor<string, []>("aw_211_cast_fp16")];
+            tensor<string, []> aw_213_equation_0 = const()[name = tensor<string, []>("aw_213_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_213_cast_fp16 = einsum(equation = aw_213_equation_0, values = (var_1790_cast_fp16_10, var_1776_cast_fp16_10))[name = tensor<string, []>("aw_213_cast_fp16")];
+            tensor<string, []> aw_215_equation_0 = const()[name = tensor<string, []>("aw_215_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_215_cast_fp16 = einsum(equation = aw_215_equation_0, values = (var_1790_cast_fp16_11, var_1776_cast_fp16_11))[name = tensor<string, []>("aw_215_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1840_cast_fp16 = softmax(axis = var_1724, x = aw_193_cast_fp16)[name = tensor<string, []>("op_1840_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1841_cast_fp16 = softmax(axis = var_1724, x = aw_195_cast_fp16)[name = tensor<string, []>("op_1841_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1842_cast_fp16 = softmax(axis = var_1724, x = aw_197_cast_fp16)[name = tensor<string, []>("op_1842_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1843_cast_fp16 = softmax(axis = var_1724, x = aw_199_cast_fp16)[name = tensor<string, []>("op_1843_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1844_cast_fp16 = softmax(axis = var_1724, x = aw_201_cast_fp16)[name = tensor<string, []>("op_1844_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1845_cast_fp16 = softmax(axis = var_1724, x = aw_203_cast_fp16)[name = tensor<string, []>("op_1845_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1846_cast_fp16 = softmax(axis = var_1724, x = aw_205_cast_fp16)[name = tensor<string, []>("op_1846_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1847_cast_fp16 = softmax(axis = var_1724, x = aw_207_cast_fp16)[name = tensor<string, []>("op_1847_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1848_cast_fp16 = softmax(axis = var_1724, x = aw_209_cast_fp16)[name = tensor<string, []>("op_1848_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1849_cast_fp16 = softmax(axis = var_1724, x = aw_211_cast_fp16)[name = tensor<string, []>("op_1849_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1850_cast_fp16 = softmax(axis = var_1724, x = aw_213_cast_fp16)[name = tensor<string, []>("op_1850_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_1851_cast_fp16 = softmax(axis = var_1724, x = aw_215_cast_fp16)[name = tensor<string, []>("op_1851_cast_fp16")];
+            tensor<string, []> var_1853_equation_0 = const()[name = tensor<string, []>("op_1853_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1853_cast_fp16 = einsum(equation = var_1853_equation_0, values = (var_1803_cast_fp16_0, var_1840_cast_fp16))[name = tensor<string, []>("op_1853_cast_fp16")];
+            tensor<string, []> var_1855_equation_0 = const()[name = tensor<string, []>("op_1855_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1855_cast_fp16 = einsum(equation = var_1855_equation_0, values = (var_1803_cast_fp16_1, var_1841_cast_fp16))[name = tensor<string, []>("op_1855_cast_fp16")];
+            tensor<string, []> var_1857_equation_0 = const()[name = tensor<string, []>("op_1857_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1857_cast_fp16 = einsum(equation = var_1857_equation_0, values = (var_1803_cast_fp16_2, var_1842_cast_fp16))[name = tensor<string, []>("op_1857_cast_fp16")];
+            tensor<string, []> var_1859_equation_0 = const()[name = tensor<string, []>("op_1859_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1859_cast_fp16 = einsum(equation = var_1859_equation_0, values = (var_1803_cast_fp16_3, var_1843_cast_fp16))[name = tensor<string, []>("op_1859_cast_fp16")];
+            tensor<string, []> var_1861_equation_0 = const()[name = tensor<string, []>("op_1861_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1861_cast_fp16 = einsum(equation = var_1861_equation_0, values = (var_1803_cast_fp16_4, var_1844_cast_fp16))[name = tensor<string, []>("op_1861_cast_fp16")];
+            tensor<string, []> var_1863_equation_0 = const()[name = tensor<string, []>("op_1863_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1863_cast_fp16 = einsum(equation = var_1863_equation_0, values = (var_1803_cast_fp16_5, var_1845_cast_fp16))[name = tensor<string, []>("op_1863_cast_fp16")];
+            tensor<string, []> var_1865_equation_0 = const()[name = tensor<string, []>("op_1865_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1865_cast_fp16 = einsum(equation = var_1865_equation_0, values = (var_1803_cast_fp16_6, var_1846_cast_fp16))[name = tensor<string, []>("op_1865_cast_fp16")];
+            tensor<string, []> var_1867_equation_0 = const()[name = tensor<string, []>("op_1867_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1867_cast_fp16 = einsum(equation = var_1867_equation_0, values = (var_1803_cast_fp16_7, var_1847_cast_fp16))[name = tensor<string, []>("op_1867_cast_fp16")];
+            tensor<string, []> var_1869_equation_0 = const()[name = tensor<string, []>("op_1869_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1869_cast_fp16 = einsum(equation = var_1869_equation_0, values = (var_1803_cast_fp16_8, var_1848_cast_fp16))[name = tensor<string, []>("op_1869_cast_fp16")];
+            tensor<string, []> var_1871_equation_0 = const()[name = tensor<string, []>("op_1871_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1871_cast_fp16 = einsum(equation = var_1871_equation_0, values = (var_1803_cast_fp16_9, var_1849_cast_fp16))[name = tensor<string, []>("op_1871_cast_fp16")];
+            tensor<string, []> var_1873_equation_0 = const()[name = tensor<string, []>("op_1873_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1873_cast_fp16 = einsum(equation = var_1873_equation_0, values = (var_1803_cast_fp16_10, var_1850_cast_fp16))[name = tensor<string, []>("op_1873_cast_fp16")];
+            tensor<string, []> var_1875_equation_0 = const()[name = tensor<string, []>("op_1875_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_1875_cast_fp16 = einsum(equation = var_1875_equation_0, values = (var_1803_cast_fp16_11, var_1851_cast_fp16))[name = tensor<string, []>("op_1875_cast_fp16")];
+            tensor<bool, []> input_85_interleave_0 = const()[name = tensor<string, []>("input_85_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_85_cast_fp16 = concat(axis = var_1724, interleave = input_85_interleave_0, values = (var_1853_cast_fp16, var_1855_cast_fp16, var_1857_cast_fp16, var_1859_cast_fp16, var_1861_cast_fp16, var_1863_cast_fp16, var_1865_cast_fp16, var_1867_cast_fp16, var_1869_cast_fp16, var_1871_cast_fp16, var_1873_cast_fp16, var_1875_cast_fp16))[name = tensor<string, []>("input_85_cast_fp16")];
+            tensor<string, []> var_1884_pad_type_0 = const()[name = tensor<string, []>("op_1884_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1884_strides_0 = const()[name = tensor<string, []>("op_1884_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1884_pad_0 = const()[name = tensor<string, []>("op_1884_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1884_dilations_0 = const()[name = tensor<string, []>("op_1884_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1884_groups_0 = const()[name = tensor<string, []>("op_1884_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_8_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(123161920)))];
+            tensor<fp16, [768]> blocks_8_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124341632)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1884_cast_fp16 = conv(bias = blocks_8_attn_out_bias_to_fp16, dilations = var_1884_dilations_0, groups = var_1884_groups_0, pad = var_1884_pad_0, pad_type = var_1884_pad_type_0, strides = var_1884_strides_0, weight = blocks_8_attn_out_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("op_1884_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = var_1884_cast_fp16)[name = tensor<string, []>("inputs_35_cast_fp16")];
+            tensor<int32, [1]> input_87_axes_0 = const()[name = tensor<string, []>("input_87_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_87_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_87_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124343232)))];
+            tensor<fp16, [768]> input_87_beta_0_to_fp16 = const()[name = tensor<string, []>("input_87_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124344832)))];
+            tensor<fp16, []> var_1894_to_fp16 = const()[name = tensor<string, []>("op_1894_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_87_cast_fp16 = layer_norm(axes = input_87_axes_0, beta = input_87_beta_0_to_fp16, epsilon = var_1894_to_fp16, gamma = input_87_gamma_0_to_fp16, x = inputs_35_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
+            tensor<string, []> input_89_pad_type_0 = const()[name = tensor<string, []>("input_89_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_89_strides_0 = const()[name = tensor<string, []>("input_89_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_89_pad_0 = const()[name = tensor<string, []>("input_89_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_89_dilations_0 = const()[name = tensor<string, []>("input_89_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_89_groups_0 = const()[name = tensor<string, []>("input_89_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_8_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(124346432)))];
+            tensor<fp16, [3072]> blocks_8_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(129065088)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_89_cast_fp16 = conv(bias = blocks_8_mlp_0_bias_to_fp16, dilations = input_89_dilations_0, groups = input_89_groups_0, pad = input_89_pad_0, pad_type = input_89_pad_type_0, strides = input_89_strides_0, weight = blocks_8_mlp_0_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
+            tensor<string, []> input_91_mode_0 = const()[name = tensor<string, []>("input_91_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_91_cast_fp16 = gelu(mode = input_91_mode_0, x = input_89_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
+            tensor<string, []> var_1920_pad_type_0 = const()[name = tensor<string, []>("op_1920_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1920_strides_0 = const()[name = tensor<string, []>("op_1920_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1920_pad_0 = const()[name = tensor<string, []>("op_1920_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1920_dilations_0 = const()[name = tensor<string, []>("op_1920_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1920_groups_0 = const()[name = tensor<string, []>("op_1920_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_8_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(129071296)))];
+            tensor<fp16, [768]> blocks_8_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_8_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133789952)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1920_cast_fp16 = conv(bias = blocks_8_mlp_2_bias_to_fp16, dilations = var_1920_dilations_0, groups = var_1920_groups_0, pad = var_1920_pad_0, pad_type = var_1920_pad_type_0, strides = var_1920_strides_0, weight = blocks_8_mlp_2_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("op_1920_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = var_1920_cast_fp16)[name = tensor<string, []>("inputs_37_cast_fp16")];
+            tensor<int32, []> var_1929 = const()[name = tensor<string, []>("op_1929"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_93_axes_0 = const()[name = tensor<string, []>("input_93_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_93_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_93_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133791552)))];
+            tensor<fp16, [768]> input_93_beta_0_to_fp16 = const()[name = tensor<string, []>("input_93_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133793152)))];
+            tensor<fp16, []> var_1945_to_fp16 = const()[name = tensor<string, []>("op_1945_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_93_cast_fp16 = layer_norm(axes = input_93_axes_0, beta = input_93_beta_0_to_fp16, epsilon = var_1945_to_fp16, gamma = input_93_gamma_0_to_fp16, x = inputs_37_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
+            tensor<string, []> q_19_pad_type_0 = const()[name = tensor<string, []>("q_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_19_strides_0 = const()[name = tensor<string, []>("q_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_19_pad_0 = const()[name = tensor<string, []>("q_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_19_dilations_0 = const()[name = tensor<string, []>("q_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_19_groups_0 = const()[name = tensor<string, []>("q_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_1980_weight_0_to_fp16 = const()[name = tensor<string, []>("op_1980_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(133794752)))];
+            tensor<fp16, [768]> var_1980_bias_0_to_fp16 = const()[name = tensor<string, []>("op_1980_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(134974464)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1980_cast_fp16 = conv(bias = var_1980_bias_0_to_fp16, dilations = q_19_dilations_0, groups = q_19_groups_0, pad = q_19_pad_0, pad_type = q_19_pad_type_0, strides = q_19_strides_0, weight = var_1980_weight_0_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("op_1980_cast_fp16")];
+            tensor<string, []> k_19_pad_type_0 = const()[name = tensor<string, []>("k_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_19_strides_0 = const()[name = tensor<string, []>("k_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_19_pad_0 = const()[name = tensor<string, []>("k_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_19_dilations_0 = const()[name = tensor<string, []>("k_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_19_groups_0 = const()[name = tensor<string, []>("k_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_9_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(134976064)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_19_cast_fp16 = conv(dilations = k_19_dilations_0, groups = k_19_groups_0, pad = k_19_pad_0, pad_type = k_19_pad_type_0, strides = k_19_strides_0, weight = blocks_9_attn_key_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("k_19_cast_fp16")];
+            tensor<string, []> var_1978_pad_type_0 = const()[name = tensor<string, []>("op_1978_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_1978_strides_0 = const()[name = tensor<string, []>("op_1978_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1978_pad_0 = const()[name = tensor<string, []>("op_1978_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1978_dilations_0 = const()[name = tensor<string, []>("op_1978_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_1978_groups_0 = const()[name = tensor<string, []>("op_1978_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_9_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(136155776)))];
+            tensor<fp16, [768]> blocks_9_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137335488)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1978_cast_fp16 = conv(bias = blocks_9_attn_value_bias_to_fp16, dilations = var_1978_dilations_0, groups = var_1978_groups_0, pad = var_1978_pad_0, pad_type = var_1978_pad_type_0, strides = var_1978_strides_0, weight = blocks_9_attn_value_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("op_1978_cast_fp16")];
+            tensor<int32, [12]> tile_27 = const()[name = tensor<string, []>("tile_27"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1981_axis_0 = const()[name = tensor<string, []>("op_1981_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_1981_cast_fp16_11 = split(axis = var_1981_axis_0, split_sizes = tile_27, x = var_1980_cast_fp16)[name = tensor<string, []>("op_1981_cast_fp16")];
+            tensor<int32, [4]> var_1994_perm_0 = const()[name = tensor<string, []>("op_1994_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_28 = const()[name = tensor<string, []>("tile_28"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_1995_axis_0 = const()[name = tensor<string, []>("op_1995_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_1994_cast_fp16 = transpose(perm = var_1994_perm_0, x = k_19_cast_fp16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_1995_cast_fp16_11 = split(axis = var_1995_axis_0, split_sizes = tile_28, x = var_1994_cast_fp16)[name = tensor<string, []>("op_1995_cast_fp16")];
+            tensor<int32, [12]> tile_29 = const()[name = tensor<string, []>("tile_29"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2008_axis_0 = const()[name = tensor<string, []>("op_2008_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2008_cast_fp16_11 = split(axis = var_2008_axis_0, split_sizes = tile_29, x = var_1978_cast_fp16)[name = tensor<string, []>("op_2008_cast_fp16")];
+            tensor<string, []> aw_217_equation_0 = const()[name = tensor<string, []>("aw_217_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_217_cast_fp16 = einsum(equation = aw_217_equation_0, values = (var_1995_cast_fp16_0, var_1981_cast_fp16_0))[name = tensor<string, []>("aw_217_cast_fp16")];
+            tensor<string, []> aw_219_equation_0 = const()[name = tensor<string, []>("aw_219_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_219_cast_fp16 = einsum(equation = aw_219_equation_0, values = (var_1995_cast_fp16_1, var_1981_cast_fp16_1))[name = tensor<string, []>("aw_219_cast_fp16")];
+            tensor<string, []> aw_221_equation_0 = const()[name = tensor<string, []>("aw_221_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_221_cast_fp16 = einsum(equation = aw_221_equation_0, values = (var_1995_cast_fp16_2, var_1981_cast_fp16_2))[name = tensor<string, []>("aw_221_cast_fp16")];
+            tensor<string, []> aw_223_equation_0 = const()[name = tensor<string, []>("aw_223_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_223_cast_fp16 = einsum(equation = aw_223_equation_0, values = (var_1995_cast_fp16_3, var_1981_cast_fp16_3))[name = tensor<string, []>("aw_223_cast_fp16")];
+            tensor<string, []> aw_225_equation_0 = const()[name = tensor<string, []>("aw_225_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_225_cast_fp16 = einsum(equation = aw_225_equation_0, values = (var_1995_cast_fp16_4, var_1981_cast_fp16_4))[name = tensor<string, []>("aw_225_cast_fp16")];
+            tensor<string, []> aw_227_equation_0 = const()[name = tensor<string, []>("aw_227_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_227_cast_fp16 = einsum(equation = aw_227_equation_0, values = (var_1995_cast_fp16_5, var_1981_cast_fp16_5))[name = tensor<string, []>("aw_227_cast_fp16")];
+            tensor<string, []> aw_229_equation_0 = const()[name = tensor<string, []>("aw_229_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_229_cast_fp16 = einsum(equation = aw_229_equation_0, values = (var_1995_cast_fp16_6, var_1981_cast_fp16_6))[name = tensor<string, []>("aw_229_cast_fp16")];
+            tensor<string, []> aw_231_equation_0 = const()[name = tensor<string, []>("aw_231_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_231_cast_fp16 = einsum(equation = aw_231_equation_0, values = (var_1995_cast_fp16_7, var_1981_cast_fp16_7))[name = tensor<string, []>("aw_231_cast_fp16")];
+            tensor<string, []> aw_233_equation_0 = const()[name = tensor<string, []>("aw_233_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_233_cast_fp16 = einsum(equation = aw_233_equation_0, values = (var_1995_cast_fp16_8, var_1981_cast_fp16_8))[name = tensor<string, []>("aw_233_cast_fp16")];
+            tensor<string, []> aw_235_equation_0 = const()[name = tensor<string, []>("aw_235_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_235_cast_fp16 = einsum(equation = aw_235_equation_0, values = (var_1995_cast_fp16_9, var_1981_cast_fp16_9))[name = tensor<string, []>("aw_235_cast_fp16")];
+            tensor<string, []> aw_237_equation_0 = const()[name = tensor<string, []>("aw_237_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_237_cast_fp16 = einsum(equation = aw_237_equation_0, values = (var_1995_cast_fp16_10, var_1981_cast_fp16_10))[name = tensor<string, []>("aw_237_cast_fp16")];
+            tensor<string, []> aw_239_equation_0 = const()[name = tensor<string, []>("aw_239_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_239_cast_fp16 = einsum(equation = aw_239_equation_0, values = (var_1995_cast_fp16_11, var_1981_cast_fp16_11))[name = tensor<string, []>("aw_239_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2045_cast_fp16 = softmax(axis = var_1929, x = aw_217_cast_fp16)[name = tensor<string, []>("op_2045_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2046_cast_fp16 = softmax(axis = var_1929, x = aw_219_cast_fp16)[name = tensor<string, []>("op_2046_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2047_cast_fp16 = softmax(axis = var_1929, x = aw_221_cast_fp16)[name = tensor<string, []>("op_2047_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2048_cast_fp16 = softmax(axis = var_1929, x = aw_223_cast_fp16)[name = tensor<string, []>("op_2048_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2049_cast_fp16 = softmax(axis = var_1929, x = aw_225_cast_fp16)[name = tensor<string, []>("op_2049_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2050_cast_fp16 = softmax(axis = var_1929, x = aw_227_cast_fp16)[name = tensor<string, []>("op_2050_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2051_cast_fp16 = softmax(axis = var_1929, x = aw_229_cast_fp16)[name = tensor<string, []>("op_2051_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2052_cast_fp16 = softmax(axis = var_1929, x = aw_231_cast_fp16)[name = tensor<string, []>("op_2052_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2053_cast_fp16 = softmax(axis = var_1929, x = aw_233_cast_fp16)[name = tensor<string, []>("op_2053_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2054_cast_fp16 = softmax(axis = var_1929, x = aw_235_cast_fp16)[name = tensor<string, []>("op_2054_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2055_cast_fp16 = softmax(axis = var_1929, x = aw_237_cast_fp16)[name = tensor<string, []>("op_2055_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2056_cast_fp16 = softmax(axis = var_1929, x = aw_239_cast_fp16)[name = tensor<string, []>("op_2056_cast_fp16")];
+            tensor<string, []> var_2058_equation_0 = const()[name = tensor<string, []>("op_2058_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2058_cast_fp16 = einsum(equation = var_2058_equation_0, values = (var_2008_cast_fp16_0, var_2045_cast_fp16))[name = tensor<string, []>("op_2058_cast_fp16")];
+            tensor<string, []> var_2060_equation_0 = const()[name = tensor<string, []>("op_2060_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2060_cast_fp16 = einsum(equation = var_2060_equation_0, values = (var_2008_cast_fp16_1, var_2046_cast_fp16))[name = tensor<string, []>("op_2060_cast_fp16")];
+            tensor<string, []> var_2062_equation_0 = const()[name = tensor<string, []>("op_2062_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2062_cast_fp16 = einsum(equation = var_2062_equation_0, values = (var_2008_cast_fp16_2, var_2047_cast_fp16))[name = tensor<string, []>("op_2062_cast_fp16")];
+            tensor<string, []> var_2064_equation_0 = const()[name = tensor<string, []>("op_2064_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2064_cast_fp16 = einsum(equation = var_2064_equation_0, values = (var_2008_cast_fp16_3, var_2048_cast_fp16))[name = tensor<string, []>("op_2064_cast_fp16")];
+            tensor<string, []> var_2066_equation_0 = const()[name = tensor<string, []>("op_2066_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2066_cast_fp16 = einsum(equation = var_2066_equation_0, values = (var_2008_cast_fp16_4, var_2049_cast_fp16))[name = tensor<string, []>("op_2066_cast_fp16")];
+            tensor<string, []> var_2068_equation_0 = const()[name = tensor<string, []>("op_2068_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2068_cast_fp16 = einsum(equation = var_2068_equation_0, values = (var_2008_cast_fp16_5, var_2050_cast_fp16))[name = tensor<string, []>("op_2068_cast_fp16")];
+            tensor<string, []> var_2070_equation_0 = const()[name = tensor<string, []>("op_2070_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2070_cast_fp16 = einsum(equation = var_2070_equation_0, values = (var_2008_cast_fp16_6, var_2051_cast_fp16))[name = tensor<string, []>("op_2070_cast_fp16")];
+            tensor<string, []> var_2072_equation_0 = const()[name = tensor<string, []>("op_2072_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2072_cast_fp16 = einsum(equation = var_2072_equation_0, values = (var_2008_cast_fp16_7, var_2052_cast_fp16))[name = tensor<string, []>("op_2072_cast_fp16")];
+            tensor<string, []> var_2074_equation_0 = const()[name = tensor<string, []>("op_2074_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2074_cast_fp16 = einsum(equation = var_2074_equation_0, values = (var_2008_cast_fp16_8, var_2053_cast_fp16))[name = tensor<string, []>("op_2074_cast_fp16")];
+            tensor<string, []> var_2076_equation_0 = const()[name = tensor<string, []>("op_2076_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2076_cast_fp16 = einsum(equation = var_2076_equation_0, values = (var_2008_cast_fp16_9, var_2054_cast_fp16))[name = tensor<string, []>("op_2076_cast_fp16")];
+            tensor<string, []> var_2078_equation_0 = const()[name = tensor<string, []>("op_2078_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2078_cast_fp16 = einsum(equation = var_2078_equation_0, values = (var_2008_cast_fp16_10, var_2055_cast_fp16))[name = tensor<string, []>("op_2078_cast_fp16")];
+            tensor<string, []> var_2080_equation_0 = const()[name = tensor<string, []>("op_2080_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2080_cast_fp16 = einsum(equation = var_2080_equation_0, values = (var_2008_cast_fp16_11, var_2056_cast_fp16))[name = tensor<string, []>("op_2080_cast_fp16")];
+            tensor<bool, []> input_95_interleave_0 = const()[name = tensor<string, []>("input_95_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_95_cast_fp16 = concat(axis = var_1929, interleave = input_95_interleave_0, values = (var_2058_cast_fp16, var_2060_cast_fp16, var_2062_cast_fp16, var_2064_cast_fp16, var_2066_cast_fp16, var_2068_cast_fp16, var_2070_cast_fp16, var_2072_cast_fp16, var_2074_cast_fp16, var_2076_cast_fp16, var_2078_cast_fp16, var_2080_cast_fp16))[name = tensor<string, []>("input_95_cast_fp16")];
+            tensor<string, []> var_2089_pad_type_0 = const()[name = tensor<string, []>("op_2089_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2089_strides_0 = const()[name = tensor<string, []>("op_2089_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2089_pad_0 = const()[name = tensor<string, []>("op_2089_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2089_dilations_0 = const()[name = tensor<string, []>("op_2089_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2089_groups_0 = const()[name = tensor<string, []>("op_2089_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_9_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(137337088)))];
+            tensor<fp16, [768]> blocks_9_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138516800)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2089_cast_fp16 = conv(bias = blocks_9_attn_out_bias_to_fp16, dilations = var_2089_dilations_0, groups = var_2089_groups_0, pad = var_2089_pad_0, pad_type = var_2089_pad_type_0, strides = var_2089_strides_0, weight = blocks_9_attn_out_weight_to_fp16, x = input_95_cast_fp16)[name = tensor<string, []>("op_2089_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = var_2089_cast_fp16)[name = tensor<string, []>("inputs_39_cast_fp16")];
+            tensor<int32, [1]> input_97_axes_0 = const()[name = tensor<string, []>("input_97_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_97_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_97_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138518400)))];
+            tensor<fp16, [768]> input_97_beta_0_to_fp16 = const()[name = tensor<string, []>("input_97_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138520000)))];
+            tensor<fp16, []> var_2099_to_fp16 = const()[name = tensor<string, []>("op_2099_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_97_cast_fp16 = layer_norm(axes = input_97_axes_0, beta = input_97_beta_0_to_fp16, epsilon = var_2099_to_fp16, gamma = input_97_gamma_0_to_fp16, x = inputs_39_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")];
+            tensor<string, []> input_99_pad_type_0 = const()[name = tensor<string, []>("input_99_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_99_strides_0 = const()[name = tensor<string, []>("input_99_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_99_pad_0 = const()[name = tensor<string, []>("input_99_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_99_dilations_0 = const()[name = tensor<string, []>("input_99_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_99_groups_0 = const()[name = tensor<string, []>("input_99_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_9_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(138521600)))];
+            tensor<fp16, [3072]> blocks_9_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(143240256)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_99_cast_fp16 = conv(bias = blocks_9_mlp_0_bias_to_fp16, dilations = input_99_dilations_0, groups = input_99_groups_0, pad = input_99_pad_0, pad_type = input_99_pad_type_0, strides = input_99_strides_0, weight = blocks_9_mlp_0_weight_to_fp16, x = input_97_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
+            tensor<string, []> input_101_mode_0 = const()[name = tensor<string, []>("input_101_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_101_cast_fp16 = gelu(mode = input_101_mode_0, x = input_99_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
+            tensor<string, []> var_2125_pad_type_0 = const()[name = tensor<string, []>("op_2125_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2125_strides_0 = const()[name = tensor<string, []>("op_2125_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2125_pad_0 = const()[name = tensor<string, []>("op_2125_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2125_dilations_0 = const()[name = tensor<string, []>("op_2125_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2125_groups_0 = const()[name = tensor<string, []>("op_2125_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_9_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(143246464)))];
+            tensor<fp16, [768]> blocks_9_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_9_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147965120)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2125_cast_fp16 = conv(bias = blocks_9_mlp_2_bias_to_fp16, dilations = var_2125_dilations_0, groups = var_2125_groups_0, pad = var_2125_pad_0, pad_type = var_2125_pad_type_0, strides = var_2125_strides_0, weight = blocks_9_mlp_2_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("op_2125_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = var_2125_cast_fp16)[name = tensor<string, []>("inputs_41_cast_fp16")];
+            tensor<int32, []> var_2134 = const()[name = tensor<string, []>("op_2134"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_103_axes_0 = const()[name = tensor<string, []>("input_103_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_103_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_103_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147966720)))];
+            tensor<fp16, [768]> input_103_beta_0_to_fp16 = const()[name = tensor<string, []>("input_103_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147968320)))];
+            tensor<fp16, []> var_2150_to_fp16 = const()[name = tensor<string, []>("op_2150_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_103_cast_fp16 = layer_norm(axes = input_103_axes_0, beta = input_103_beta_0_to_fp16, epsilon = var_2150_to_fp16, gamma = input_103_gamma_0_to_fp16, x = inputs_41_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
+            tensor<string, []> q_21_pad_type_0 = const()[name = tensor<string, []>("q_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_21_strides_0 = const()[name = tensor<string, []>("q_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_21_pad_0 = const()[name = tensor<string, []>("q_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_21_dilations_0 = const()[name = tensor<string, []>("q_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_21_groups_0 = const()[name = tensor<string, []>("q_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_2185_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2185_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(147969920)))];
+            tensor<fp16, [768]> var_2185_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2185_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(149149632)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2185_cast_fp16 = conv(bias = var_2185_bias_0_to_fp16, dilations = q_21_dilations_0, groups = q_21_groups_0, pad = q_21_pad_0, pad_type = q_21_pad_type_0, strides = q_21_strides_0, weight = var_2185_weight_0_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("op_2185_cast_fp16")];
+            tensor<string, []> k_21_pad_type_0 = const()[name = tensor<string, []>("k_21_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_21_strides_0 = const()[name = tensor<string, []>("k_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_21_pad_0 = const()[name = tensor<string, []>("k_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_21_dilations_0 = const()[name = tensor<string, []>("k_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_21_groups_0 = const()[name = tensor<string, []>("k_21_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_10_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(149151232)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_21_cast_fp16 = conv(dilations = k_21_dilations_0, groups = k_21_groups_0, pad = k_21_pad_0, pad_type = k_21_pad_type_0, strides = k_21_strides_0, weight = blocks_10_attn_key_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("k_21_cast_fp16")];
+            tensor<string, []> var_2183_pad_type_0 = const()[name = tensor<string, []>("op_2183_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2183_strides_0 = const()[name = tensor<string, []>("op_2183_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2183_pad_0 = const()[name = tensor<string, []>("op_2183_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2183_dilations_0 = const()[name = tensor<string, []>("op_2183_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2183_groups_0 = const()[name = tensor<string, []>("op_2183_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_10_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(150330944)))];
+            tensor<fp16, [768]> blocks_10_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(151510656)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2183_cast_fp16 = conv(bias = blocks_10_attn_value_bias_to_fp16, dilations = var_2183_dilations_0, groups = var_2183_groups_0, pad = var_2183_pad_0, pad_type = var_2183_pad_type_0, strides = var_2183_strides_0, weight = blocks_10_attn_value_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("op_2183_cast_fp16")];
+            tensor<int32, [12]> tile_30 = const()[name = tensor<string, []>("tile_30"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2186_axis_0 = const()[name = tensor<string, []>("op_2186_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2186_cast_fp16_11 = split(axis = var_2186_axis_0, split_sizes = tile_30, x = var_2185_cast_fp16)[name = tensor<string, []>("op_2186_cast_fp16")];
+            tensor<int32, [4]> var_2199_perm_0 = const()[name = tensor<string, []>("op_2199_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_31 = const()[name = tensor<string, []>("tile_31"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2200_axis_0 = const()[name = tensor<string, []>("op_2200_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_2199_cast_fp16 = transpose(perm = var_2199_perm_0, x = k_21_cast_fp16)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2200_cast_fp16_11 = split(axis = var_2200_axis_0, split_sizes = tile_31, x = var_2199_cast_fp16)[name = tensor<string, []>("op_2200_cast_fp16")];
+            tensor<int32, [12]> tile_32 = const()[name = tensor<string, []>("tile_32"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2213_axis_0 = const()[name = tensor<string, []>("op_2213_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2213_cast_fp16_11 = split(axis = var_2213_axis_0, split_sizes = tile_32, x = var_2183_cast_fp16)[name = tensor<string, []>("op_2213_cast_fp16")];
+            tensor<string, []> aw_241_equation_0 = const()[name = tensor<string, []>("aw_241_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_241_cast_fp16 = einsum(equation = aw_241_equation_0, values = (var_2200_cast_fp16_0, var_2186_cast_fp16_0))[name = tensor<string, []>("aw_241_cast_fp16")];
+            tensor<string, []> aw_243_equation_0 = const()[name = tensor<string, []>("aw_243_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_243_cast_fp16 = einsum(equation = aw_243_equation_0, values = (var_2200_cast_fp16_1, var_2186_cast_fp16_1))[name = tensor<string, []>("aw_243_cast_fp16")];
+            tensor<string, []> aw_245_equation_0 = const()[name = tensor<string, []>("aw_245_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_245_cast_fp16 = einsum(equation = aw_245_equation_0, values = (var_2200_cast_fp16_2, var_2186_cast_fp16_2))[name = tensor<string, []>("aw_245_cast_fp16")];
+            tensor<string, []> aw_247_equation_0 = const()[name = tensor<string, []>("aw_247_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_247_cast_fp16 = einsum(equation = aw_247_equation_0, values = (var_2200_cast_fp16_3, var_2186_cast_fp16_3))[name = tensor<string, []>("aw_247_cast_fp16")];
+            tensor<string, []> aw_249_equation_0 = const()[name = tensor<string, []>("aw_249_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_249_cast_fp16 = einsum(equation = aw_249_equation_0, values = (var_2200_cast_fp16_4, var_2186_cast_fp16_4))[name = tensor<string, []>("aw_249_cast_fp16")];
+            tensor<string, []> aw_251_equation_0 = const()[name = tensor<string, []>("aw_251_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_251_cast_fp16 = einsum(equation = aw_251_equation_0, values = (var_2200_cast_fp16_5, var_2186_cast_fp16_5))[name = tensor<string, []>("aw_251_cast_fp16")];
+            tensor<string, []> aw_253_equation_0 = const()[name = tensor<string, []>("aw_253_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_253_cast_fp16 = einsum(equation = aw_253_equation_0, values = (var_2200_cast_fp16_6, var_2186_cast_fp16_6))[name = tensor<string, []>("aw_253_cast_fp16")];
+            tensor<string, []> aw_255_equation_0 = const()[name = tensor<string, []>("aw_255_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_255_cast_fp16 = einsum(equation = aw_255_equation_0, values = (var_2200_cast_fp16_7, var_2186_cast_fp16_7))[name = tensor<string, []>("aw_255_cast_fp16")];
+            tensor<string, []> aw_257_equation_0 = const()[name = tensor<string, []>("aw_257_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_257_cast_fp16 = einsum(equation = aw_257_equation_0, values = (var_2200_cast_fp16_8, var_2186_cast_fp16_8))[name = tensor<string, []>("aw_257_cast_fp16")];
+            tensor<string, []> aw_259_equation_0 = const()[name = tensor<string, []>("aw_259_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_259_cast_fp16 = einsum(equation = aw_259_equation_0, values = (var_2200_cast_fp16_9, var_2186_cast_fp16_9))[name = tensor<string, []>("aw_259_cast_fp16")];
+            tensor<string, []> aw_261_equation_0 = const()[name = tensor<string, []>("aw_261_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_261_cast_fp16 = einsum(equation = aw_261_equation_0, values = (var_2200_cast_fp16_10, var_2186_cast_fp16_10))[name = tensor<string, []>("aw_261_cast_fp16")];
+            tensor<string, []> aw_263_equation_0 = const()[name = tensor<string, []>("aw_263_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_263_cast_fp16 = einsum(equation = aw_263_equation_0, values = (var_2200_cast_fp16_11, var_2186_cast_fp16_11))[name = tensor<string, []>("aw_263_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2250_cast_fp16 = softmax(axis = var_2134, x = aw_241_cast_fp16)[name = tensor<string, []>("op_2250_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2251_cast_fp16 = softmax(axis = var_2134, x = aw_243_cast_fp16)[name = tensor<string, []>("op_2251_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2252_cast_fp16 = softmax(axis = var_2134, x = aw_245_cast_fp16)[name = tensor<string, []>("op_2252_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2253_cast_fp16 = softmax(axis = var_2134, x = aw_247_cast_fp16)[name = tensor<string, []>("op_2253_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2254_cast_fp16 = softmax(axis = var_2134, x = aw_249_cast_fp16)[name = tensor<string, []>("op_2254_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2255_cast_fp16 = softmax(axis = var_2134, x = aw_251_cast_fp16)[name = tensor<string, []>("op_2255_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2256_cast_fp16 = softmax(axis = var_2134, x = aw_253_cast_fp16)[name = tensor<string, []>("op_2256_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2257_cast_fp16 = softmax(axis = var_2134, x = aw_255_cast_fp16)[name = tensor<string, []>("op_2257_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2258_cast_fp16 = softmax(axis = var_2134, x = aw_257_cast_fp16)[name = tensor<string, []>("op_2258_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2259_cast_fp16 = softmax(axis = var_2134, x = aw_259_cast_fp16)[name = tensor<string, []>("op_2259_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2260_cast_fp16 = softmax(axis = var_2134, x = aw_261_cast_fp16)[name = tensor<string, []>("op_2260_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2261_cast_fp16 = softmax(axis = var_2134, x = aw_263_cast_fp16)[name = tensor<string, []>("op_2261_cast_fp16")];
+            tensor<string, []> var_2263_equation_0 = const()[name = tensor<string, []>("op_2263_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2263_cast_fp16 = einsum(equation = var_2263_equation_0, values = (var_2213_cast_fp16_0, var_2250_cast_fp16))[name = tensor<string, []>("op_2263_cast_fp16")];
+            tensor<string, []> var_2265_equation_0 = const()[name = tensor<string, []>("op_2265_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2265_cast_fp16 = einsum(equation = var_2265_equation_0, values = (var_2213_cast_fp16_1, var_2251_cast_fp16))[name = tensor<string, []>("op_2265_cast_fp16")];
+            tensor<string, []> var_2267_equation_0 = const()[name = tensor<string, []>("op_2267_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2267_cast_fp16 = einsum(equation = var_2267_equation_0, values = (var_2213_cast_fp16_2, var_2252_cast_fp16))[name = tensor<string, []>("op_2267_cast_fp16")];
+            tensor<string, []> var_2269_equation_0 = const()[name = tensor<string, []>("op_2269_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2269_cast_fp16 = einsum(equation = var_2269_equation_0, values = (var_2213_cast_fp16_3, var_2253_cast_fp16))[name = tensor<string, []>("op_2269_cast_fp16")];
+            tensor<string, []> var_2271_equation_0 = const()[name = tensor<string, []>("op_2271_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2271_cast_fp16 = einsum(equation = var_2271_equation_0, values = (var_2213_cast_fp16_4, var_2254_cast_fp16))[name = tensor<string, []>("op_2271_cast_fp16")];
+            tensor<string, []> var_2273_equation_0 = const()[name = tensor<string, []>("op_2273_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2273_cast_fp16 = einsum(equation = var_2273_equation_0, values = (var_2213_cast_fp16_5, var_2255_cast_fp16))[name = tensor<string, []>("op_2273_cast_fp16")];
+            tensor<string, []> var_2275_equation_0 = const()[name = tensor<string, []>("op_2275_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2275_cast_fp16 = einsum(equation = var_2275_equation_0, values = (var_2213_cast_fp16_6, var_2256_cast_fp16))[name = tensor<string, []>("op_2275_cast_fp16")];
+            tensor<string, []> var_2277_equation_0 = const()[name = tensor<string, []>("op_2277_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2277_cast_fp16 = einsum(equation = var_2277_equation_0, values = (var_2213_cast_fp16_7, var_2257_cast_fp16))[name = tensor<string, []>("op_2277_cast_fp16")];
+            tensor<string, []> var_2279_equation_0 = const()[name = tensor<string, []>("op_2279_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2279_cast_fp16 = einsum(equation = var_2279_equation_0, values = (var_2213_cast_fp16_8, var_2258_cast_fp16))[name = tensor<string, []>("op_2279_cast_fp16")];
+            tensor<string, []> var_2281_equation_0 = const()[name = tensor<string, []>("op_2281_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2281_cast_fp16 = einsum(equation = var_2281_equation_0, values = (var_2213_cast_fp16_9, var_2259_cast_fp16))[name = tensor<string, []>("op_2281_cast_fp16")];
+            tensor<string, []> var_2283_equation_0 = const()[name = tensor<string, []>("op_2283_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2283_cast_fp16 = einsum(equation = var_2283_equation_0, values = (var_2213_cast_fp16_10, var_2260_cast_fp16))[name = tensor<string, []>("op_2283_cast_fp16")];
+            tensor<string, []> var_2285_equation_0 = const()[name = tensor<string, []>("op_2285_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2285_cast_fp16 = einsum(equation = var_2285_equation_0, values = (var_2213_cast_fp16_11, var_2261_cast_fp16))[name = tensor<string, []>("op_2285_cast_fp16")];
+            tensor<bool, []> input_105_interleave_0 = const()[name = tensor<string, []>("input_105_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_105_cast_fp16 = concat(axis = var_2134, interleave = input_105_interleave_0, values = (var_2263_cast_fp16, var_2265_cast_fp16, var_2267_cast_fp16, var_2269_cast_fp16, var_2271_cast_fp16, var_2273_cast_fp16, var_2275_cast_fp16, var_2277_cast_fp16, var_2279_cast_fp16, var_2281_cast_fp16, var_2283_cast_fp16, var_2285_cast_fp16))[name = tensor<string, []>("input_105_cast_fp16")];
+            tensor<string, []> var_2294_pad_type_0 = const()[name = tensor<string, []>("op_2294_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2294_strides_0 = const()[name = tensor<string, []>("op_2294_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2294_pad_0 = const()[name = tensor<string, []>("op_2294_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2294_dilations_0 = const()[name = tensor<string, []>("op_2294_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2294_groups_0 = const()[name = tensor<string, []>("op_2294_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_10_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(151512256)))];
+            tensor<fp16, [768]> blocks_10_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152691968)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2294_cast_fp16 = conv(bias = blocks_10_attn_out_bias_to_fp16, dilations = var_2294_dilations_0, groups = var_2294_groups_0, pad = var_2294_pad_0, pad_type = var_2294_pad_type_0, strides = var_2294_strides_0, weight = blocks_10_attn_out_weight_to_fp16, x = input_105_cast_fp16)[name = tensor<string, []>("op_2294_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = var_2294_cast_fp16)[name = tensor<string, []>("inputs_43_cast_fp16")];
+            tensor<int32, [1]> input_107_axes_0 = const()[name = tensor<string, []>("input_107_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_107_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_107_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152693568)))];
+            tensor<fp16, [768]> input_107_beta_0_to_fp16 = const()[name = tensor<string, []>("input_107_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152695168)))];
+            tensor<fp16, []> var_2304_to_fp16 = const()[name = tensor<string, []>("op_2304_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_107_cast_fp16 = layer_norm(axes = input_107_axes_0, beta = input_107_beta_0_to_fp16, epsilon = var_2304_to_fp16, gamma = input_107_gamma_0_to_fp16, x = inputs_43_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
+            tensor<string, []> input_109_pad_type_0 = const()[name = tensor<string, []>("input_109_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_109_strides_0 = const()[name = tensor<string, []>("input_109_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_109_pad_0 = const()[name = tensor<string, []>("input_109_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_109_dilations_0 = const()[name = tensor<string, []>("input_109_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_109_groups_0 = const()[name = tensor<string, []>("input_109_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_10_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(152696768)))];
+            tensor<fp16, [3072]> blocks_10_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157415424)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_109_cast_fp16 = conv(bias = blocks_10_mlp_0_bias_to_fp16, dilations = input_109_dilations_0, groups = input_109_groups_0, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = input_109_strides_0, weight = blocks_10_mlp_0_weight_to_fp16, x = input_107_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
+            tensor<string, []> input_111_mode_0 = const()[name = tensor<string, []>("input_111_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_111_cast_fp16 = gelu(mode = input_111_mode_0, x = input_109_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
+            tensor<string, []> var_2330_pad_type_0 = const()[name = tensor<string, []>("op_2330_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2330_strides_0 = const()[name = tensor<string, []>("op_2330_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2330_pad_0 = const()[name = tensor<string, []>("op_2330_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2330_dilations_0 = const()[name = tensor<string, []>("op_2330_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2330_groups_0 = const()[name = tensor<string, []>("op_2330_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_10_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(157421632)))];
+            tensor<fp16, [768]> blocks_10_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_10_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162140288)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2330_cast_fp16 = conv(bias = blocks_10_mlp_2_bias_to_fp16, dilations = var_2330_dilations_0, groups = var_2330_groups_0, pad = var_2330_pad_0, pad_type = var_2330_pad_type_0, strides = var_2330_strides_0, weight = blocks_10_mlp_2_weight_to_fp16, x = input_111_cast_fp16)[name = tensor<string, []>("op_2330_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = var_2330_cast_fp16)[name = tensor<string, []>("inputs_45_cast_fp16")];
+            tensor<int32, []> var_2339 = const()[name = tensor<string, []>("op_2339"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_113_axes_0 = const()[name = tensor<string, []>("input_113_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_113_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_113_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162141888)))];
+            tensor<fp16, [768]> input_113_beta_0_to_fp16 = const()[name = tensor<string, []>("input_113_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162143488)))];
+            tensor<fp16, []> var_2355_to_fp16 = const()[name = tensor<string, []>("op_2355_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_113_cast_fp16 = layer_norm(axes = input_113_axes_0, beta = input_113_beta_0_to_fp16, epsilon = var_2355_to_fp16, gamma = input_113_gamma_0_to_fp16, x = inputs_45_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
+            tensor<string, []> q_pad_type_0 = const()[name = tensor<string, []>("q_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_strides_0 = const()[name = tensor<string, []>("q_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_pad_0 = const()[name = tensor<string, []>("q_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_dilations_0 = const()[name = tensor<string, []>("q_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_groups_0 = const()[name = tensor<string, []>("q_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> var_2390_weight_0_to_fp16 = const()[name = tensor<string, []>("op_2390_weight_0_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(162145088)))];
+            tensor<fp16, [768]> var_2390_bias_0_to_fp16 = const()[name = tensor<string, []>("op_2390_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163324800)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2390_cast_fp16 = conv(bias = var_2390_bias_0_to_fp16, dilations = q_dilations_0, groups = q_groups_0, pad = q_pad_0, pad_type = q_pad_type_0, strides = q_strides_0, weight = var_2390_weight_0_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("op_2390_cast_fp16")];
+            tensor<string, []> k_pad_type_0 = const()[name = tensor<string, []>("k_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_strides_0 = const()[name = tensor<string, []>("k_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_pad_0 = const()[name = tensor<string, []>("k_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_dilations_0 = const()[name = tensor<string, []>("k_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_groups_0 = const()[name = tensor<string, []>("k_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_11_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_key_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(163326400)))];
+            tensor<fp16, [1, 768, 1, 1500]> k_cast_fp16 = conv(dilations = k_dilations_0, groups = k_groups_0, pad = k_pad_0, pad_type = k_pad_type_0, strides = k_strides_0, weight = blocks_11_attn_key_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("k_cast_fp16")];
+            tensor<string, []> var_2388_pad_type_0 = const()[name = tensor<string, []>("op_2388_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2388_strides_0 = const()[name = tensor<string, []>("op_2388_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2388_pad_0 = const()[name = tensor<string, []>("op_2388_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2388_dilations_0 = const()[name = tensor<string, []>("op_2388_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2388_groups_0 = const()[name = tensor<string, []>("op_2388_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_11_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_value_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(164506112)))];
+            tensor<fp16, [768]> blocks_11_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165685824)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2388_cast_fp16 = conv(bias = blocks_11_attn_value_bias_to_fp16, dilations = var_2388_dilations_0, groups = var_2388_groups_0, pad = var_2388_pad_0, pad_type = var_2388_pad_type_0, strides = var_2388_strides_0, weight = blocks_11_attn_value_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("op_2388_cast_fp16")];
+            tensor<int32, [12]> tile_33 = const()[name = tensor<string, []>("tile_33"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2391_axis_0 = const()[name = tensor<string, []>("op_2391_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2391_cast_fp16_11 = split(axis = var_2391_axis_0, split_sizes = tile_33, x = var_2390_cast_fp16)[name = tensor<string, []>("op_2391_cast_fp16")];
+            tensor<int32, [4]> var_2404_perm_0 = const()[name = tensor<string, []>("op_2404_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_34 = const()[name = tensor<string, []>("tile_34"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2405_axis_0 = const()[name = tensor<string, []>("op_2405_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 768]> var_2404_cast_fp16 = transpose(perm = var_2404_perm_0, x = k_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_5, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_6, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_7, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_8, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_9, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_10, tensor<fp16, [1, 1500, 1, 64]> var_2405_cast_fp16_11 = split(axis = var_2405_axis_0, split_sizes = tile_34, x = var_2404_cast_fp16)[name = tensor<string, []>("op_2405_cast_fp16")];
+            tensor<int32, [12]> tile_35 = const()[name = tensor<string, []>("tile_35"), val = tensor<int32, [12]>([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_2418_axis_0 = const()[name = tensor<string, []>("op_2418_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_5, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_6, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_7, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_8, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_9, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_10, tensor<fp16, [1, 64, 1, 1500]> var_2418_cast_fp16_11 = split(axis = var_2418_axis_0, split_sizes = tile_35, x = var_2388_cast_fp16)[name = tensor<string, []>("op_2418_cast_fp16")];
+            tensor<string, []> aw_265_equation_0 = const()[name = tensor<string, []>("aw_265_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_265_cast_fp16 = einsum(equation = aw_265_equation_0, values = (var_2405_cast_fp16_0, var_2391_cast_fp16_0))[name = tensor<string, []>("aw_265_cast_fp16")];
+            tensor<string, []> aw_267_equation_0 = const()[name = tensor<string, []>("aw_267_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_267_cast_fp16 = einsum(equation = aw_267_equation_0, values = (var_2405_cast_fp16_1, var_2391_cast_fp16_1))[name = tensor<string, []>("aw_267_cast_fp16")];
+            tensor<string, []> aw_269_equation_0 = const()[name = tensor<string, []>("aw_269_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_269_cast_fp16 = einsum(equation = aw_269_equation_0, values = (var_2405_cast_fp16_2, var_2391_cast_fp16_2))[name = tensor<string, []>("aw_269_cast_fp16")];
+            tensor<string, []> aw_271_equation_0 = const()[name = tensor<string, []>("aw_271_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_271_cast_fp16 = einsum(equation = aw_271_equation_0, values = (var_2405_cast_fp16_3, var_2391_cast_fp16_3))[name = tensor<string, []>("aw_271_cast_fp16")];
+            tensor<string, []> aw_273_equation_0 = const()[name = tensor<string, []>("aw_273_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_273_cast_fp16 = einsum(equation = aw_273_equation_0, values = (var_2405_cast_fp16_4, var_2391_cast_fp16_4))[name = tensor<string, []>("aw_273_cast_fp16")];
+            tensor<string, []> aw_275_equation_0 = const()[name = tensor<string, []>("aw_275_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_275_cast_fp16 = einsum(equation = aw_275_equation_0, values = (var_2405_cast_fp16_5, var_2391_cast_fp16_5))[name = tensor<string, []>("aw_275_cast_fp16")];
+            tensor<string, []> aw_277_equation_0 = const()[name = tensor<string, []>("aw_277_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_277_cast_fp16 = einsum(equation = aw_277_equation_0, values = (var_2405_cast_fp16_6, var_2391_cast_fp16_6))[name = tensor<string, []>("aw_277_cast_fp16")];
+            tensor<string, []> aw_279_equation_0 = const()[name = tensor<string, []>("aw_279_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_279_cast_fp16 = einsum(equation = aw_279_equation_0, values = (var_2405_cast_fp16_7, var_2391_cast_fp16_7))[name = tensor<string, []>("aw_279_cast_fp16")];
+            tensor<string, []> aw_281_equation_0 = const()[name = tensor<string, []>("aw_281_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_281_cast_fp16 = einsum(equation = aw_281_equation_0, values = (var_2405_cast_fp16_8, var_2391_cast_fp16_8))[name = tensor<string, []>("aw_281_cast_fp16")];
+            tensor<string, []> aw_283_equation_0 = const()[name = tensor<string, []>("aw_283_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_283_cast_fp16 = einsum(equation = aw_283_equation_0, values = (var_2405_cast_fp16_9, var_2391_cast_fp16_9))[name = tensor<string, []>("aw_283_cast_fp16")];
+            tensor<string, []> aw_285_equation_0 = const()[name = tensor<string, []>("aw_285_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_285_cast_fp16 = einsum(equation = aw_285_equation_0, values = (var_2405_cast_fp16_10, var_2391_cast_fp16_10))[name = tensor<string, []>("aw_285_cast_fp16")];
+            tensor<string, []> aw_equation_0 = const()[name = tensor<string, []>("aw_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_cast_fp16 = einsum(equation = aw_equation_0, values = (var_2405_cast_fp16_11, var_2391_cast_fp16_11))[name = tensor<string, []>("aw_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2455_cast_fp16 = softmax(axis = var_2339, x = aw_265_cast_fp16)[name = tensor<string, []>("op_2455_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2456_cast_fp16 = softmax(axis = var_2339, x = aw_267_cast_fp16)[name = tensor<string, []>("op_2456_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2457_cast_fp16 = softmax(axis = var_2339, x = aw_269_cast_fp16)[name = tensor<string, []>("op_2457_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2458_cast_fp16 = softmax(axis = var_2339, x = aw_271_cast_fp16)[name = tensor<string, []>("op_2458_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2459_cast_fp16 = softmax(axis = var_2339, x = aw_273_cast_fp16)[name = tensor<string, []>("op_2459_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2460_cast_fp16 = softmax(axis = var_2339, x = aw_275_cast_fp16)[name = tensor<string, []>("op_2460_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2461_cast_fp16 = softmax(axis = var_2339, x = aw_277_cast_fp16)[name = tensor<string, []>("op_2461_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2462_cast_fp16 = softmax(axis = var_2339, x = aw_279_cast_fp16)[name = tensor<string, []>("op_2462_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2463_cast_fp16 = softmax(axis = var_2339, x = aw_281_cast_fp16)[name = tensor<string, []>("op_2463_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2464_cast_fp16 = softmax(axis = var_2339, x = aw_283_cast_fp16)[name = tensor<string, []>("op_2464_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2465_cast_fp16 = softmax(axis = var_2339, x = aw_285_cast_fp16)[name = tensor<string, []>("op_2465_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_2466_cast_fp16 = softmax(axis = var_2339, x = aw_cast_fp16)[name = tensor<string, []>("op_2466_cast_fp16")];
+            tensor<string, []> var_2468_equation_0 = const()[name = tensor<string, []>("op_2468_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2468_cast_fp16 = einsum(equation = var_2468_equation_0, values = (var_2418_cast_fp16_0, var_2455_cast_fp16))[name = tensor<string, []>("op_2468_cast_fp16")];
+            tensor<string, []> var_2470_equation_0 = const()[name = tensor<string, []>("op_2470_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2470_cast_fp16 = einsum(equation = var_2470_equation_0, values = (var_2418_cast_fp16_1, var_2456_cast_fp16))[name = tensor<string, []>("op_2470_cast_fp16")];
+            tensor<string, []> var_2472_equation_0 = const()[name = tensor<string, []>("op_2472_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2472_cast_fp16 = einsum(equation = var_2472_equation_0, values = (var_2418_cast_fp16_2, var_2457_cast_fp16))[name = tensor<string, []>("op_2472_cast_fp16")];
+            tensor<string, []> var_2474_equation_0 = const()[name = tensor<string, []>("op_2474_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2474_cast_fp16 = einsum(equation = var_2474_equation_0, values = (var_2418_cast_fp16_3, var_2458_cast_fp16))[name = tensor<string, []>("op_2474_cast_fp16")];
+            tensor<string, []> var_2476_equation_0 = const()[name = tensor<string, []>("op_2476_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2476_cast_fp16 = einsum(equation = var_2476_equation_0, values = (var_2418_cast_fp16_4, var_2459_cast_fp16))[name = tensor<string, []>("op_2476_cast_fp16")];
+            tensor<string, []> var_2478_equation_0 = const()[name = tensor<string, []>("op_2478_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2478_cast_fp16 = einsum(equation = var_2478_equation_0, values = (var_2418_cast_fp16_5, var_2460_cast_fp16))[name = tensor<string, []>("op_2478_cast_fp16")];
+            tensor<string, []> var_2480_equation_0 = const()[name = tensor<string, []>("op_2480_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2480_cast_fp16 = einsum(equation = var_2480_equation_0, values = (var_2418_cast_fp16_6, var_2461_cast_fp16))[name = tensor<string, []>("op_2480_cast_fp16")];
+            tensor<string, []> var_2482_equation_0 = const()[name = tensor<string, []>("op_2482_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2482_cast_fp16 = einsum(equation = var_2482_equation_0, values = (var_2418_cast_fp16_7, var_2462_cast_fp16))[name = tensor<string, []>("op_2482_cast_fp16")];
+            tensor<string, []> var_2484_equation_0 = const()[name = tensor<string, []>("op_2484_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2484_cast_fp16 = einsum(equation = var_2484_equation_0, values = (var_2418_cast_fp16_8, var_2463_cast_fp16))[name = tensor<string, []>("op_2484_cast_fp16")];
+            tensor<string, []> var_2486_equation_0 = const()[name = tensor<string, []>("op_2486_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2486_cast_fp16 = einsum(equation = var_2486_equation_0, values = (var_2418_cast_fp16_9, var_2464_cast_fp16))[name = tensor<string, []>("op_2486_cast_fp16")];
+            tensor<string, []> var_2488_equation_0 = const()[name = tensor<string, []>("op_2488_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2488_cast_fp16 = einsum(equation = var_2488_equation_0, values = (var_2418_cast_fp16_10, var_2465_cast_fp16))[name = tensor<string, []>("op_2488_cast_fp16")];
+            tensor<string, []> var_2490_equation_0 = const()[name = tensor<string, []>("op_2490_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_2490_cast_fp16 = einsum(equation = var_2490_equation_0, values = (var_2418_cast_fp16_11, var_2466_cast_fp16))[name = tensor<string, []>("op_2490_cast_fp16")];
+            tensor<bool, []> input_115_interleave_0 = const()[name = tensor<string, []>("input_115_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 768, 1, 1500]> input_115_cast_fp16 = concat(axis = var_2339, interleave = input_115_interleave_0, values = (var_2468_cast_fp16, var_2470_cast_fp16, var_2472_cast_fp16, var_2474_cast_fp16, var_2476_cast_fp16, var_2478_cast_fp16, var_2480_cast_fp16, var_2482_cast_fp16, var_2484_cast_fp16, var_2486_cast_fp16, var_2488_cast_fp16, var_2490_cast_fp16))[name = tensor<string, []>("input_115_cast_fp16")];
+            tensor<string, []> var_2499_pad_type_0 = const()[name = tensor<string, []>("op_2499_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2499_strides_0 = const()[name = tensor<string, []>("op_2499_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2499_pad_0 = const()[name = tensor<string, []>("op_2499_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2499_dilations_0 = const()[name = tensor<string, []>("op_2499_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2499_groups_0 = const()[name = tensor<string, []>("op_2499_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 768, 1, 1]> blocks_11_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_out_weight_to_fp16"), val = tensor<fp16, [768, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(165687424)))];
+            tensor<fp16, [768]> blocks_11_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_attn_out_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(166867136)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2499_cast_fp16 = conv(bias = blocks_11_attn_out_bias_to_fp16, dilations = var_2499_dilations_0, groups = var_2499_groups_0, pad = var_2499_pad_0, pad_type = var_2499_pad_type_0, strides = var_2499_strides_0, weight = blocks_11_attn_out_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("op_2499_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = var_2499_cast_fp16)[name = tensor<string, []>("inputs_47_cast_fp16")];
+            tensor<int32, [1]> input_117_axes_0 = const()[name = tensor<string, []>("input_117_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> input_117_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_117_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(166868736)))];
+            tensor<fp16, [768]> input_117_beta_0_to_fp16 = const()[name = tensor<string, []>("input_117_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(166870336)))];
+            tensor<fp16, []> var_2509_to_fp16 = const()[name = tensor<string, []>("op_2509_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_117_cast_fp16 = layer_norm(axes = input_117_axes_0, beta = input_117_beta_0_to_fp16, epsilon = var_2509_to_fp16, gamma = input_117_gamma_0_to_fp16, x = inputs_47_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
+            tensor<string, []> input_119_pad_type_0 = const()[name = tensor<string, []>("input_119_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_119_strides_0 = const()[name = tensor<string, []>("input_119_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_119_pad_0 = const()[name = tensor<string, []>("input_119_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_119_dilations_0 = const()[name = tensor<string, []>("input_119_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_119_groups_0 = const()[name = tensor<string, []>("input_119_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [3072, 768, 1, 1]> blocks_11_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_0_weight_to_fp16"), val = tensor<fp16, [3072, 768, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(166871936)))];
+            tensor<fp16, [3072]> blocks_11_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_0_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171590592)))];
+            tensor<fp16, [1, 3072, 1, 1500]> input_119_cast_fp16 = conv(bias = blocks_11_mlp_0_bias_to_fp16, dilations = input_119_dilations_0, groups = input_119_groups_0, pad = input_119_pad_0, pad_type = input_119_pad_type_0, strides = input_119_strides_0, weight = blocks_11_mlp_0_weight_to_fp16, x = input_117_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_119_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<string, []> var_2535_pad_type_0 = const()[name = tensor<string, []>("op_2535_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_2535_strides_0 = const()[name = tensor<string, []>("op_2535_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2535_pad_0 = const()[name = tensor<string, []>("op_2535_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2535_dilations_0 = const()[name = tensor<string, []>("op_2535_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_2535_groups_0 = const()[name = tensor<string, []>("op_2535_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [768, 3072, 1, 1]> blocks_11_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_2_weight_to_fp16"), val = tensor<fp16, [768, 3072, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(171596800)))];
+            tensor<fp16, [768]> blocks_11_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_11_mlp_2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176315456)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2535_cast_fp16 = conv(bias = blocks_11_mlp_2_bias_to_fp16, dilations = var_2535_dilations_0, groups = var_2535_groups_0, pad = var_2535_pad_0, pad_type = var_2535_pad_type_0, strides = var_2535_strides_0, weight = blocks_11_mlp_2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("op_2535_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_cast_fp16 = add(x = inputs_47_cast_fp16, y = var_2535_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<int32, [1]> x_axes_0 = const()[name = tensor<string, []>("x_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [768]> x_gamma_0_to_fp16 = const()[name = tensor<string, []>("x_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176317056)))];
+            tensor<fp16, [768]> x_beta_0_to_fp16 = const()[name = tensor<string, []>("x_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(176318656)))];
+            tensor<fp16, []> var_2549_to_fp16 = const()[name = tensor<string, []>("op_2549_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> x_cast_fp16 = layer_norm(axes = x_axes_0, beta = x_beta_0_to_fp16, epsilon = var_2549_to_fp16, gamma = x_gamma_0_to_fp16, x = inputs_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
+            tensor<int32, [1]> var_2560_axes_0 = const()[name = tensor<string, []>("op_2560_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 768, 1500]> var_2560_cast_fp16 = squeeze(axes = var_2560_axes_0, x = x_cast_fp16)[name = tensor<string, []>("op_2560_cast_fp16")];
+            tensor<int32, [3]> var_2563_perm_0 = const()[name = tensor<string, []>("op_2563_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<string, []> var_2563_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_2563_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
+            tensor<fp16, [1, 1500, 768]> var_2563_cast_fp16 = transpose(perm = var_2563_perm_0, x = var_2560_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp32, [1, 1500, 768]> output = cast(dtype = var_2563_cast_fp16_to_fp32_dtype_0, x = var_2563_cast_fp16)[name = tensor<string, []>("cast_51")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/small/ggml-small-encoder.mlmodelc/weights/weight.bin b/small/ggml-small-encoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d8790e3bd8bd730f26d3cc409f1739d02e2c09e5
--- /dev/null
+++ b/small/ggml-small-encoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd99222f556ad6a561a7286c4dc94d14fee3d2b60bdeb4e838bfed437f501b1e
+size 176320256
diff --git a/small/ggml-small.bin b/small/ggml-small.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bc4c4d96b528654fb067e2b243bdc438cfaf0072
--- /dev/null
+++ b/small/ggml-small.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b
+size 487601967
diff --git a/tiny.en/.DS_Store b/tiny.en/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..60a0ba4058482b15937c2750a7ce25181c753fdc
Binary files /dev/null and b/tiny.en/.DS_Store differ
diff --git a/tiny.en/ggml-tiny.en-encoder.mlmodelc/analytics/coremldata.bin b/tiny.en/ggml-tiny.en-encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fb65fe66c88d9f34cbfc6d72c42eee5a375a471f
--- /dev/null
+++ b/tiny.en/ggml-tiny.en-encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c811ab131cbe7fef6230b32b61cb04cc99fb5990e5fb70ab5d7ec907a4a124b2
+size 243
diff --git a/tiny.en/ggml-tiny.en-encoder.mlmodelc/coremldata.bin b/tiny.en/ggml-tiny.en-encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5335642d44c0b6e3110bafc93373a63a26ae73d5
--- /dev/null
+++ b/tiny.en/ggml-tiny.en-encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93254f2adaadd6535328df3b1200d4922c2f701f84df2c9b37c9a5df9ae4f4b7
+size 320
diff --git a/tiny.en/ggml-tiny.en-encoder.mlmodelc/metadata.json b/tiny.en/ggml-tiny.en-encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..d3ba041cf378d66de0a2f58c407a1f810a237045
--- /dev/null
+++ b/tiny.en/ggml-tiny.en-encoder.mlmodelc/metadata.json
@@ -0,0 +1,71 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1500 × 384)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 384]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 6,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 4,
+      "Gelu" : 6,
+      "LayerNorm" : 9,
+      "Transpose" : 5,
+      "Softmax" : 24,
+      "Squeeze" : 1,
+      "Cast" : 2,
+      "Add" : 9,
+      "Einsum" : 48,
+      "ExpandDims" : 1,
+      "Split" : 12,
+      "Conv" : 26
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "12.0",
+      "tvOS" : "15.0",
+      "visionOS" : "1.0",
+      "watchOS" : "8.0",
+      "iOS" : "15.0",
+      "macCatalyst" : "15.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.version" : "8.3.0",
+      "com.github.apple.coremltools.source" : "torch==2.2.2",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 80 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "coreml_encoder_tiny_en",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/tiny.en/ggml-tiny.en-encoder.mlmodelc/model.mil b/tiny.en/ggml-tiny.en-encoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..705ff46e2edf3b81a1e15e29babaefdf0ed49ab9
--- /dev/null
+++ b/tiny.en/ggml-tiny.en-encoder.mlmodelc/model.mil
@@ -0,0 +1,463 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.2.2"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
+{
+    func main<ios15>(tensor<fp32, [1, 80, 3000]> logmel_data) {
+            tensor<string, []> var_28_pad_type_0 = const()[name = tensor<string, []>("op_28_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_28_pad_0 = const()[name = tensor<string, []>("op_28_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_28_strides_0 = const()[name = tensor<string, []>("op_28_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_28_dilations_0 = const()[name = tensor<string, []>("op_28_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_28_groups_0 = const()[name = tensor<string, []>("op_28_groups_0"), val = tensor<int32, []>(1)];
+            tensor<string, []> logmel_data_to_fp16_dtype_0 = const()[name = tensor<string, []>("logmel_data_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [384, 80, 3]> const_0_to_fp16 = const()[name = tensor<string, []>("const_0_to_fp16"), val = tensor<fp16, [384, 80, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [384]> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184448)))];
+            tensor<fp16, [1, 80, 3000]> logmel_data_to_fp16 = cast(dtype = logmel_data_to_fp16_dtype_0, x = logmel_data)[name = tensor<string, []>("cast_20")];
+            tensor<fp16, [1, 384, 3000]> var_28_cast_fp16 = conv(bias = const_1_to_fp16, dilations = var_28_dilations_0, groups = var_28_groups_0, pad = var_28_pad_0, pad_type = var_28_pad_type_0, strides = var_28_strides_0, weight = const_0_to_fp16, x = logmel_data_to_fp16)[name = tensor<string, []>("op_28_cast_fp16")];
+            tensor<string, []> input_1_mode_0 = const()[name = tensor<string, []>("input_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 384, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_28_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<string, []> var_46_pad_type_0 = const()[name = tensor<string, []>("op_46_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_46_pad_0 = const()[name = tensor<string, []>("op_46_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_46_strides_0 = const()[name = tensor<string, []>("op_46_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_46_dilations_0 = const()[name = tensor<string, []>("op_46_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_46_groups_0 = const()[name = tensor<string, []>("op_46_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 3]> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, [384, 384, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185280)))];
+            tensor<fp16, [384]> const_3_to_fp16 = const()[name = tensor<string, []>("const_3_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1070080)))];
+            tensor<fp16, [1, 384, 1500]> var_46_cast_fp16 = conv(bias = const_3_to_fp16, dilations = var_46_dilations_0, groups = var_46_groups_0, pad = var_46_pad_0, pad_type = var_46_pad_type_0, strides = var_46_strides_0, weight = const_2_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("op_46_cast_fp16")];
+            tensor<string, []> x_3_mode_0 = const()[name = tensor<string, []>("x_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 384, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_46_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
+            tensor<fp16, [384, 1500]> var_51_to_fp16 = const()[name = tensor<string, []>("op_51_to_fp16"), val = tensor<fp16, [384, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1070912)))];
+            tensor<fp16, [1, 384, 1500]> var_53_cast_fp16 = add(x = x_3_cast_fp16, y = var_51_to_fp16)[name = tensor<string, []>("op_53_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_53_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_68 = const()[name = tensor<string, []>("op_68"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2222976)))];
+            tensor<fp16, [384]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2223808)))];
+            tensor<fp16, []> var_84_to_fp16 = const()[name = tensor<string, []>("op_84_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_3_cast_fp16 = layer_norm(axes = input_3_axes_0, beta = input_3_beta_0_to_fp16, epsilon = var_84_to_fp16, gamma = input_3_gamma_0_to_fp16, x = inputs_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<string, []> q_1_pad_type_0 = const()[name = tensor<string, []>("q_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_1_strides_0 = const()[name = tensor<string, []>("q_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_1_pad_0 = const()[name = tensor<string, []>("q_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_1_dilations_0 = const()[name = tensor<string, []>("q_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_1_groups_0 = const()[name = tensor<string, []>("q_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> var_119_weight_0_to_fp16 = const()[name = tensor<string, []>("op_119_weight_0_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2224640)))];
+            tensor<fp16, [384]> var_119_bias_0_to_fp16 = const()[name = tensor<string, []>("op_119_bias_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2519616)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_119_cast_fp16 = conv(bias = var_119_bias_0_to_fp16, dilations = q_1_dilations_0, groups = q_1_groups_0, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = q_1_strides_0, weight = var_119_weight_0_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_119_cast_fp16")];
+            tensor<string, []> k_1_pad_type_0 = const()[name = tensor<string, []>("k_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_1_strides_0 = const()[name = tensor<string, []>("k_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_1_pad_0 = const()[name = tensor<string, []>("k_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_1_dilations_0 = const()[name = tensor<string, []>("k_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_1_groups_0 = const()[name = tensor<string, []>("k_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_0_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_key_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2520448)))];
+            tensor<fp16, [1, 384, 1, 1500]> k_1_cast_fp16 = conv(dilations = k_1_dilations_0, groups = k_1_groups_0, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = k_1_strides_0, weight = blocks_0_attn_key_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("k_1_cast_fp16")];
+            tensor<string, []> var_117_pad_type_0 = const()[name = tensor<string, []>("op_117_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_117_strides_0 = const()[name = tensor<string, []>("op_117_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_117_pad_0 = const()[name = tensor<string, []>("op_117_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_117_dilations_0 = const()[name = tensor<string, []>("op_117_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_117_groups_0 = const()[name = tensor<string, []>("op_117_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_0_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2815424)))];
+            tensor<fp16, [384]> blocks_0_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3110400)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_117_cast_fp16 = conv(bias = blocks_0_attn_value_bias_to_fp16, dilations = var_117_dilations_0, groups = var_117_groups_0, pad = var_117_pad_0, pad_type = var_117_pad_type_0, strides = var_117_strides_0, weight = blocks_0_attn_value_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_117_cast_fp16")];
+            tensor<int32, [6]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_120_axis_0 = const()[name = tensor<string, []>("op_120_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_120_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_120_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_120_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_120_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_120_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_120_cast_fp16_5 = split(axis = var_120_axis_0, split_sizes = tile_0, x = var_119_cast_fp16)[name = tensor<string, []>("op_120_cast_fp16")];
+            tensor<int32, [4]> var_127_perm_0 = const()[name = tensor<string, []>("op_127_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [6]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_128_axis_0 = const()[name = tensor<string, []>("op_128_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 384]> var_127_cast_fp16 = transpose(perm = var_127_perm_0, x = k_1_cast_fp16)[name = tensor<string, []>("transpose_4")];
+            tensor<fp16, [1, 1500, 1, 64]> var_128_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_128_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_128_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_128_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_128_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_128_cast_fp16_5 = split(axis = var_128_axis_0, split_sizes = tile_1, x = var_127_cast_fp16)[name = tensor<string, []>("op_128_cast_fp16")];
+            tensor<int32, [6]> tile_2 = const()[name = tensor<string, []>("tile_2"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_135_axis_0 = const()[name = tensor<string, []>("op_135_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_135_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_135_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_135_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_135_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_135_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_135_cast_fp16_5 = split(axis = var_135_axis_0, split_sizes = tile_2, x = var_117_cast_fp16)[name = tensor<string, []>("op_135_cast_fp16")];
+            tensor<string, []> aw_1_equation_0 = const()[name = tensor<string, []>("aw_1_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1_cast_fp16 = einsum(equation = aw_1_equation_0, values = (var_128_cast_fp16_0, var_120_cast_fp16_0))[name = tensor<string, []>("aw_1_cast_fp16")];
+            tensor<string, []> aw_3_equation_0 = const()[name = tensor<string, []>("aw_3_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_3_cast_fp16 = einsum(equation = aw_3_equation_0, values = (var_128_cast_fp16_1, var_120_cast_fp16_1))[name = tensor<string, []>("aw_3_cast_fp16")];
+            tensor<string, []> aw_5_equation_0 = const()[name = tensor<string, []>("aw_5_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_5_cast_fp16 = einsum(equation = aw_5_equation_0, values = (var_128_cast_fp16_2, var_120_cast_fp16_2))[name = tensor<string, []>("aw_5_cast_fp16")];
+            tensor<string, []> aw_7_equation_0 = const()[name = tensor<string, []>("aw_7_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_7_cast_fp16 = einsum(equation = aw_7_equation_0, values = (var_128_cast_fp16_3, var_120_cast_fp16_3))[name = tensor<string, []>("aw_7_cast_fp16")];
+            tensor<string, []> aw_9_equation_0 = const()[name = tensor<string, []>("aw_9_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_9_cast_fp16 = einsum(equation = aw_9_equation_0, values = (var_128_cast_fp16_4, var_120_cast_fp16_4))[name = tensor<string, []>("aw_9_cast_fp16")];
+            tensor<string, []> aw_11_equation_0 = const()[name = tensor<string, []>("aw_11_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_11_cast_fp16 = einsum(equation = aw_11_equation_0, values = (var_128_cast_fp16_5, var_120_cast_fp16_5))[name = tensor<string, []>("aw_11_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_154_cast_fp16 = softmax(axis = var_68, x = aw_1_cast_fp16)[name = tensor<string, []>("op_154_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_155_cast_fp16 = softmax(axis = var_68, x = aw_3_cast_fp16)[name = tensor<string, []>("op_155_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_156_cast_fp16 = softmax(axis = var_68, x = aw_5_cast_fp16)[name = tensor<string, []>("op_156_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_157_cast_fp16 = softmax(axis = var_68, x = aw_7_cast_fp16)[name = tensor<string, []>("op_157_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_158_cast_fp16 = softmax(axis = var_68, x = aw_9_cast_fp16)[name = tensor<string, []>("op_158_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_159_cast_fp16 = softmax(axis = var_68, x = aw_11_cast_fp16)[name = tensor<string, []>("op_159_cast_fp16")];
+            tensor<string, []> var_161_equation_0 = const()[name = tensor<string, []>("op_161_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_161_cast_fp16 = einsum(equation = var_161_equation_0, values = (var_135_cast_fp16_0, var_154_cast_fp16))[name = tensor<string, []>("op_161_cast_fp16")];
+            tensor<string, []> var_163_equation_0 = const()[name = tensor<string, []>("op_163_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16 = einsum(equation = var_163_equation_0, values = (var_135_cast_fp16_1, var_155_cast_fp16))[name = tensor<string, []>("op_163_cast_fp16")];
+            tensor<string, []> var_165_equation_0 = const()[name = tensor<string, []>("op_165_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_165_cast_fp16 = einsum(equation = var_165_equation_0, values = (var_135_cast_fp16_2, var_156_cast_fp16))[name = tensor<string, []>("op_165_cast_fp16")];
+            tensor<string, []> var_167_equation_0 = const()[name = tensor<string, []>("op_167_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_167_cast_fp16 = einsum(equation = var_167_equation_0, values = (var_135_cast_fp16_3, var_157_cast_fp16))[name = tensor<string, []>("op_167_cast_fp16")];
+            tensor<string, []> var_169_equation_0 = const()[name = tensor<string, []>("op_169_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_169_cast_fp16 = einsum(equation = var_169_equation_0, values = (var_135_cast_fp16_4, var_158_cast_fp16))[name = tensor<string, []>("op_169_cast_fp16")];
+            tensor<string, []> var_171_equation_0 = const()[name = tensor<string, []>("op_171_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_171_cast_fp16 = einsum(equation = var_171_equation_0, values = (var_135_cast_fp16_5, var_159_cast_fp16))[name = tensor<string, []>("op_171_cast_fp16")];
+            tensor<bool, []> input_5_interleave_0 = const()[name = tensor<string, []>("input_5_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 1500]> input_5_cast_fp16 = concat(axis = var_68, interleave = input_5_interleave_0, values = (var_161_cast_fp16, var_163_cast_fp16, var_165_cast_fp16, var_167_cast_fp16, var_169_cast_fp16, var_171_cast_fp16))[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> var_180_pad_type_0 = const()[name = tensor<string, []>("op_180_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_180_strides_0 = const()[name = tensor<string, []>("op_180_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_180_pad_0 = const()[name = tensor<string, []>("op_180_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_180_dilations_0 = const()[name = tensor<string, []>("op_180_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_180_groups_0 = const()[name = tensor<string, []>("op_180_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_0_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3111232)))];
+            tensor<fp16, [384]> blocks_0_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3406208)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_180_cast_fp16 = conv(bias = blocks_0_attn_out_bias_to_fp16, dilations = var_180_dilations_0, groups = var_180_groups_0, pad = var_180_pad_0, pad_type = var_180_pad_type_0, strides = var_180_strides_0, weight = blocks_0_attn_out_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("op_180_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = var_180_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> input_7_axes_0 = const()[name = tensor<string, []>("input_7_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> input_7_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_7_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3407040)))];
+            tensor<fp16, [384]> input_7_beta_0_to_fp16 = const()[name = tensor<string, []>("input_7_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3407872)))];
+            tensor<fp16, []> var_190_to_fp16 = const()[name = tensor<string, []>("op_190_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = input_7_beta_0_to_fp16, epsilon = var_190_to_fp16, gamma = input_7_gamma_0_to_fp16, x = inputs_3_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_pad_type_0 = const()[name = tensor<string, []>("input_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_9_strides_0 = const()[name = tensor<string, []>("input_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_9_pad_0 = const()[name = tensor<string, []>("input_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_9_dilations_0 = const()[name = tensor<string, []>("input_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_9_groups_0 = const()[name = tensor<string, []>("input_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1536, 384, 1, 1]> blocks_0_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3408704)))];
+            tensor<fp16, [1536]> blocks_0_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4588416)))];
+            tensor<fp16, [1, 1536, 1, 1500]> input_9_cast_fp16 = conv(bias = blocks_0_mlp_0_bias_to_fp16, dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = blocks_0_mlp_0_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<string, []> input_11_mode_0 = const()[name = tensor<string, []>("input_11_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1500]> input_11_cast_fp16 = gelu(mode = input_11_mode_0, x = input_9_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<string, []> var_216_pad_type_0 = const()[name = tensor<string, []>("op_216_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_216_strides_0 = const()[name = tensor<string, []>("op_216_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_216_pad_0 = const()[name = tensor<string, []>("op_216_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_216_dilations_0 = const()[name = tensor<string, []>("op_216_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_216_groups_0 = const()[name = tensor<string, []>("op_216_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 1536, 1, 1]> blocks_0_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4591552)))];
+            tensor<fp16, [384]> blocks_0_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5771264)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_216_cast_fp16 = conv(bias = blocks_0_mlp_2_bias_to_fp16, dilations = var_216_dilations_0, groups = var_216_groups_0, pad = var_216_pad_0, pad_type = var_216_pad_type_0, strides = var_216_strides_0, weight = blocks_0_mlp_2_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("op_216_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = var_216_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_225 = const()[name = tensor<string, []>("op_225"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_13_axes_0 = const()[name = tensor<string, []>("input_13_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> input_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_13_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5772096)))];
+            tensor<fp16, [384]> input_13_beta_0_to_fp16 = const()[name = tensor<string, []>("input_13_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5772928)))];
+            tensor<fp16, []> var_241_to_fp16 = const()[name = tensor<string, []>("op_241_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = input_13_beta_0_to_fp16, epsilon = var_241_to_fp16, gamma = input_13_gamma_0_to_fp16, x = inputs_5_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> q_3_pad_type_0 = const()[name = tensor<string, []>("q_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_3_strides_0 = const()[name = tensor<string, []>("q_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_3_pad_0 = const()[name = tensor<string, []>("q_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_3_dilations_0 = const()[name = tensor<string, []>("q_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_3_groups_0 = const()[name = tensor<string, []>("q_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> var_276_weight_0_to_fp16 = const()[name = tensor<string, []>("op_276_weight_0_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5773760)))];
+            tensor<fp16, [384]> var_276_bias_0_to_fp16 = const()[name = tensor<string, []>("op_276_bias_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6068736)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_276_cast_fp16 = conv(bias = var_276_bias_0_to_fp16, dilations = q_3_dilations_0, groups = q_3_groups_0, pad = q_3_pad_0, pad_type = q_3_pad_type_0, strides = q_3_strides_0, weight = var_276_weight_0_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_276_cast_fp16")];
+            tensor<string, []> k_3_pad_type_0 = const()[name = tensor<string, []>("k_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_3_strides_0 = const()[name = tensor<string, []>("k_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_3_pad_0 = const()[name = tensor<string, []>("k_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_3_dilations_0 = const()[name = tensor<string, []>("k_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_3_groups_0 = const()[name = tensor<string, []>("k_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_1_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_key_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6069568)))];
+            tensor<fp16, [1, 384, 1, 1500]> k_3_cast_fp16 = conv(dilations = k_3_dilations_0, groups = k_3_groups_0, pad = k_3_pad_0, pad_type = k_3_pad_type_0, strides = k_3_strides_0, weight = blocks_1_attn_key_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("k_3_cast_fp16")];
+            tensor<string, []> var_274_pad_type_0 = const()[name = tensor<string, []>("op_274_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_274_strides_0 = const()[name = tensor<string, []>("op_274_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_274_pad_0 = const()[name = tensor<string, []>("op_274_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_274_dilations_0 = const()[name = tensor<string, []>("op_274_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_274_groups_0 = const()[name = tensor<string, []>("op_274_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_1_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6364544)))];
+            tensor<fp16, [384]> blocks_1_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6659520)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_274_cast_fp16 = conv(bias = blocks_1_attn_value_bias_to_fp16, dilations = var_274_dilations_0, groups = var_274_groups_0, pad = var_274_pad_0, pad_type = var_274_pad_type_0, strides = var_274_strides_0, weight = blocks_1_attn_value_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_274_cast_fp16")];
+            tensor<int32, [6]> tile_3 = const()[name = tensor<string, []>("tile_3"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_277_axis_0 = const()[name = tensor<string, []>("op_277_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_277_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_277_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_277_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_277_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_277_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_277_cast_fp16_5 = split(axis = var_277_axis_0, split_sizes = tile_3, x = var_276_cast_fp16)[name = tensor<string, []>("op_277_cast_fp16")];
+            tensor<int32, [4]> var_284_perm_0 = const()[name = tensor<string, []>("op_284_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [6]> tile_4 = const()[name = tensor<string, []>("tile_4"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_285_axis_0 = const()[name = tensor<string, []>("op_285_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 384]> var_284_cast_fp16 = transpose(perm = var_284_perm_0, x = k_3_cast_fp16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1500, 1, 64]> var_285_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_285_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_285_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_285_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_285_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_285_cast_fp16_5 = split(axis = var_285_axis_0, split_sizes = tile_4, x = var_284_cast_fp16)[name = tensor<string, []>("op_285_cast_fp16")];
+            tensor<int32, [6]> tile_5 = const()[name = tensor<string, []>("tile_5"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_292_axis_0 = const()[name = tensor<string, []>("op_292_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_292_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_292_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_292_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_292_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_292_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_292_cast_fp16_5 = split(axis = var_292_axis_0, split_sizes = tile_5, x = var_274_cast_fp16)[name = tensor<string, []>("op_292_cast_fp16")];
+            tensor<string, []> aw_13_equation_0 = const()[name = tensor<string, []>("aw_13_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_13_cast_fp16 = einsum(equation = aw_13_equation_0, values = (var_285_cast_fp16_0, var_277_cast_fp16_0))[name = tensor<string, []>("aw_13_cast_fp16")];
+            tensor<string, []> aw_15_equation_0 = const()[name = tensor<string, []>("aw_15_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_15_cast_fp16 = einsum(equation = aw_15_equation_0, values = (var_285_cast_fp16_1, var_277_cast_fp16_1))[name = tensor<string, []>("aw_15_cast_fp16")];
+            tensor<string, []> aw_17_equation_0 = const()[name = tensor<string, []>("aw_17_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_17_cast_fp16 = einsum(equation = aw_17_equation_0, values = (var_285_cast_fp16_2, var_277_cast_fp16_2))[name = tensor<string, []>("aw_17_cast_fp16")];
+            tensor<string, []> aw_19_equation_0 = const()[name = tensor<string, []>("aw_19_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_19_cast_fp16 = einsum(equation = aw_19_equation_0, values = (var_285_cast_fp16_3, var_277_cast_fp16_3))[name = tensor<string, []>("aw_19_cast_fp16")];
+            tensor<string, []> aw_21_equation_0 = const()[name = tensor<string, []>("aw_21_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_21_cast_fp16 = einsum(equation = aw_21_equation_0, values = (var_285_cast_fp16_4, var_277_cast_fp16_4))[name = tensor<string, []>("aw_21_cast_fp16")];
+            tensor<string, []> aw_23_equation_0 = const()[name = tensor<string, []>("aw_23_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_23_cast_fp16 = einsum(equation = aw_23_equation_0, values = (var_285_cast_fp16_5, var_277_cast_fp16_5))[name = tensor<string, []>("aw_23_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_311_cast_fp16 = softmax(axis = var_225, x = aw_13_cast_fp16)[name = tensor<string, []>("op_311_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_312_cast_fp16 = softmax(axis = var_225, x = aw_15_cast_fp16)[name = tensor<string, []>("op_312_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_313_cast_fp16 = softmax(axis = var_225, x = aw_17_cast_fp16)[name = tensor<string, []>("op_313_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_314_cast_fp16 = softmax(axis = var_225, x = aw_19_cast_fp16)[name = tensor<string, []>("op_314_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_315_cast_fp16 = softmax(axis = var_225, x = aw_21_cast_fp16)[name = tensor<string, []>("op_315_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_316_cast_fp16 = softmax(axis = var_225, x = aw_23_cast_fp16)[name = tensor<string, []>("op_316_cast_fp16")];
+            tensor<string, []> var_318_equation_0 = const()[name = tensor<string, []>("op_318_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_318_cast_fp16 = einsum(equation = var_318_equation_0, values = (var_292_cast_fp16_0, var_311_cast_fp16))[name = tensor<string, []>("op_318_cast_fp16")];
+            tensor<string, []> var_320_equation_0 = const()[name = tensor<string, []>("op_320_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_320_cast_fp16 = einsum(equation = var_320_equation_0, values = (var_292_cast_fp16_1, var_312_cast_fp16))[name = tensor<string, []>("op_320_cast_fp16")];
+            tensor<string, []> var_322_equation_0 = const()[name = tensor<string, []>("op_322_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_322_cast_fp16 = einsum(equation = var_322_equation_0, values = (var_292_cast_fp16_2, var_313_cast_fp16))[name = tensor<string, []>("op_322_cast_fp16")];
+            tensor<string, []> var_324_equation_0 = const()[name = tensor<string, []>("op_324_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_324_cast_fp16 = einsum(equation = var_324_equation_0, values = (var_292_cast_fp16_3, var_314_cast_fp16))[name = tensor<string, []>("op_324_cast_fp16")];
+            tensor<string, []> var_326_equation_0 = const()[name = tensor<string, []>("op_326_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_326_cast_fp16 = einsum(equation = var_326_equation_0, values = (var_292_cast_fp16_4, var_315_cast_fp16))[name = tensor<string, []>("op_326_cast_fp16")];
+            tensor<string, []> var_328_equation_0 = const()[name = tensor<string, []>("op_328_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_328_cast_fp16 = einsum(equation = var_328_equation_0, values = (var_292_cast_fp16_5, var_316_cast_fp16))[name = tensor<string, []>("op_328_cast_fp16")];
+            tensor<bool, []> input_15_interleave_0 = const()[name = tensor<string, []>("input_15_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 1500]> input_15_cast_fp16 = concat(axis = var_225, interleave = input_15_interleave_0, values = (var_318_cast_fp16, var_320_cast_fp16, var_322_cast_fp16, var_324_cast_fp16, var_326_cast_fp16, var_328_cast_fp16))[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<string, []> var_337_pad_type_0 = const()[name = tensor<string, []>("op_337_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_337_strides_0 = const()[name = tensor<string, []>("op_337_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_337_pad_0 = const()[name = tensor<string, []>("op_337_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_337_dilations_0 = const()[name = tensor<string, []>("op_337_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_337_groups_0 = const()[name = tensor<string, []>("op_337_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_1_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6660352)))];
+            tensor<fp16, [384]> blocks_1_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6955328)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_337_cast_fp16 = conv(bias = blocks_1_attn_out_bias_to_fp16, dilations = var_337_dilations_0, groups = var_337_groups_0, pad = var_337_pad_0, pad_type = var_337_pad_type_0, strides = var_337_strides_0, weight = blocks_1_attn_out_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("op_337_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = var_337_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> input_17_axes_0 = const()[name = tensor<string, []>("input_17_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> input_17_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_17_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6956160)))];
+            tensor<fp16, [384]> input_17_beta_0_to_fp16 = const()[name = tensor<string, []>("input_17_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6956992)))];
+            tensor<fp16, []> var_347_to_fp16 = const()[name = tensor<string, []>("op_347_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_17_cast_fp16 = layer_norm(axes = input_17_axes_0, beta = input_17_beta_0_to_fp16, epsilon = var_347_to_fp16, gamma = input_17_gamma_0_to_fp16, x = inputs_7_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_pad_type_0 = const()[name = tensor<string, []>("input_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_19_strides_0 = const()[name = tensor<string, []>("input_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_19_pad_0 = const()[name = tensor<string, []>("input_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_19_dilations_0 = const()[name = tensor<string, []>("input_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_19_groups_0 = const()[name = tensor<string, []>("input_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1536, 384, 1, 1]> blocks_1_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6957824)))];
+            tensor<fp16, [1536]> blocks_1_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8137536)))];
+            tensor<fp16, [1, 1536, 1, 1500]> input_19_cast_fp16 = conv(bias = blocks_1_mlp_0_bias_to_fp16, dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = blocks_1_mlp_0_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<string, []> input_21_mode_0 = const()[name = tensor<string, []>("input_21_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1500]> input_21_cast_fp16 = gelu(mode = input_21_mode_0, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> var_373_pad_type_0 = const()[name = tensor<string, []>("op_373_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_373_strides_0 = const()[name = tensor<string, []>("op_373_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_373_pad_0 = const()[name = tensor<string, []>("op_373_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_373_dilations_0 = const()[name = tensor<string, []>("op_373_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_373_groups_0 = const()[name = tensor<string, []>("op_373_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 1536, 1, 1]> blocks_1_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8140672)))];
+            tensor<fp16, [384]> blocks_1_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9320384)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_373_cast_fp16 = conv(bias = blocks_1_mlp_2_bias_to_fp16, dilations = var_373_dilations_0, groups = var_373_groups_0, pad = var_373_pad_0, pad_type = var_373_pad_type_0, strides = var_373_strides_0, weight = blocks_1_mlp_2_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("op_373_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = var_373_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_382 = const()[name = tensor<string, []>("op_382"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_23_axes_0 = const()[name = tensor<string, []>("input_23_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> input_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_23_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9321216)))];
+            tensor<fp16, [384]> input_23_beta_0_to_fp16 = const()[name = tensor<string, []>("input_23_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9322048)))];
+            tensor<fp16, []> var_398_to_fp16 = const()[name = tensor<string, []>("op_398_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_23_cast_fp16 = layer_norm(axes = input_23_axes_0, beta = input_23_beta_0_to_fp16, epsilon = var_398_to_fp16, gamma = input_23_gamma_0_to_fp16, x = inputs_9_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<string, []> q_5_pad_type_0 = const()[name = tensor<string, []>("q_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_5_strides_0 = const()[name = tensor<string, []>("q_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_5_pad_0 = const()[name = tensor<string, []>("q_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_5_dilations_0 = const()[name = tensor<string, []>("q_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_5_groups_0 = const()[name = tensor<string, []>("q_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> var_433_weight_0_to_fp16 = const()[name = tensor<string, []>("op_433_weight_0_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9322880)))];
+            tensor<fp16, [384]> var_433_bias_0_to_fp16 = const()[name = tensor<string, []>("op_433_bias_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9617856)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_433_cast_fp16 = conv(bias = var_433_bias_0_to_fp16, dilations = q_5_dilations_0, groups = q_5_groups_0, pad = q_5_pad_0, pad_type = q_5_pad_type_0, strides = q_5_strides_0, weight = var_433_weight_0_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_433_cast_fp16")];
+            tensor<string, []> k_5_pad_type_0 = const()[name = tensor<string, []>("k_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_5_strides_0 = const()[name = tensor<string, []>("k_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_5_pad_0 = const()[name = tensor<string, []>("k_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_5_dilations_0 = const()[name = tensor<string, []>("k_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_5_groups_0 = const()[name = tensor<string, []>("k_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_2_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_key_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9618688)))];
+            tensor<fp16, [1, 384, 1, 1500]> k_5_cast_fp16 = conv(dilations = k_5_dilations_0, groups = k_5_groups_0, pad = k_5_pad_0, pad_type = k_5_pad_type_0, strides = k_5_strides_0, weight = blocks_2_attn_key_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("k_5_cast_fp16")];
+            tensor<string, []> var_431_pad_type_0 = const()[name = tensor<string, []>("op_431_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_431_strides_0 = const()[name = tensor<string, []>("op_431_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_431_pad_0 = const()[name = tensor<string, []>("op_431_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_431_dilations_0 = const()[name = tensor<string, []>("op_431_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_431_groups_0 = const()[name = tensor<string, []>("op_431_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_2_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9913664)))];
+            tensor<fp16, [384]> blocks_2_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10208640)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_431_cast_fp16 = conv(bias = blocks_2_attn_value_bias_to_fp16, dilations = var_431_dilations_0, groups = var_431_groups_0, pad = var_431_pad_0, pad_type = var_431_pad_type_0, strides = var_431_strides_0, weight = blocks_2_attn_value_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_431_cast_fp16")];
+            tensor<int32, [6]> tile_6 = const()[name = tensor<string, []>("tile_6"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_434_axis_0 = const()[name = tensor<string, []>("op_434_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_434_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_434_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_434_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_434_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_434_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_434_cast_fp16_5 = split(axis = var_434_axis_0, split_sizes = tile_6, x = var_433_cast_fp16)[name = tensor<string, []>("op_434_cast_fp16")];
+            tensor<int32, [4]> var_441_perm_0 = const()[name = tensor<string, []>("op_441_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [6]> tile_7 = const()[name = tensor<string, []>("tile_7"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_442_axis_0 = const()[name = tensor<string, []>("op_442_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 384]> var_441_cast_fp16 = transpose(perm = var_441_perm_0, x = k_5_cast_fp16)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 1500, 1, 64]> var_442_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_442_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_442_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_442_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_442_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_442_cast_fp16_5 = split(axis = var_442_axis_0, split_sizes = tile_7, x = var_441_cast_fp16)[name = tensor<string, []>("op_442_cast_fp16")];
+            tensor<int32, [6]> tile_8 = const()[name = tensor<string, []>("tile_8"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_449_axis_0 = const()[name = tensor<string, []>("op_449_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_449_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_449_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_449_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_449_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_449_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_449_cast_fp16_5 = split(axis = var_449_axis_0, split_sizes = tile_8, x = var_431_cast_fp16)[name = tensor<string, []>("op_449_cast_fp16")];
+            tensor<string, []> aw_25_equation_0 = const()[name = tensor<string, []>("aw_25_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_25_cast_fp16 = einsum(equation = aw_25_equation_0, values = (var_442_cast_fp16_0, var_434_cast_fp16_0))[name = tensor<string, []>("aw_25_cast_fp16")];
+            tensor<string, []> aw_27_equation_0 = const()[name = tensor<string, []>("aw_27_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_27_cast_fp16 = einsum(equation = aw_27_equation_0, values = (var_442_cast_fp16_1, var_434_cast_fp16_1))[name = tensor<string, []>("aw_27_cast_fp16")];
+            tensor<string, []> aw_29_equation_0 = const()[name = tensor<string, []>("aw_29_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_29_cast_fp16 = einsum(equation = aw_29_equation_0, values = (var_442_cast_fp16_2, var_434_cast_fp16_2))[name = tensor<string, []>("aw_29_cast_fp16")];
+            tensor<string, []> aw_31_equation_0 = const()[name = tensor<string, []>("aw_31_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_31_cast_fp16 = einsum(equation = aw_31_equation_0, values = (var_442_cast_fp16_3, var_434_cast_fp16_3))[name = tensor<string, []>("aw_31_cast_fp16")];
+            tensor<string, []> aw_33_equation_0 = const()[name = tensor<string, []>("aw_33_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_33_cast_fp16 = einsum(equation = aw_33_equation_0, values = (var_442_cast_fp16_4, var_434_cast_fp16_4))[name = tensor<string, []>("aw_33_cast_fp16")];
+            tensor<string, []> aw_35_equation_0 = const()[name = tensor<string, []>("aw_35_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_35_cast_fp16 = einsum(equation = aw_35_equation_0, values = (var_442_cast_fp16_5, var_434_cast_fp16_5))[name = tensor<string, []>("aw_35_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_468_cast_fp16 = softmax(axis = var_382, x = aw_25_cast_fp16)[name = tensor<string, []>("op_468_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_469_cast_fp16 = softmax(axis = var_382, x = aw_27_cast_fp16)[name = tensor<string, []>("op_469_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_470_cast_fp16 = softmax(axis = var_382, x = aw_29_cast_fp16)[name = tensor<string, []>("op_470_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_471_cast_fp16 = softmax(axis = var_382, x = aw_31_cast_fp16)[name = tensor<string, []>("op_471_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_472_cast_fp16 = softmax(axis = var_382, x = aw_33_cast_fp16)[name = tensor<string, []>("op_472_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_473_cast_fp16 = softmax(axis = var_382, x = aw_35_cast_fp16)[name = tensor<string, []>("op_473_cast_fp16")];
+            tensor<string, []> var_475_equation_0 = const()[name = tensor<string, []>("op_475_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_475_cast_fp16 = einsum(equation = var_475_equation_0, values = (var_449_cast_fp16_0, var_468_cast_fp16))[name = tensor<string, []>("op_475_cast_fp16")];
+            tensor<string, []> var_477_equation_0 = const()[name = tensor<string, []>("op_477_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_477_cast_fp16 = einsum(equation = var_477_equation_0, values = (var_449_cast_fp16_1, var_469_cast_fp16))[name = tensor<string, []>("op_477_cast_fp16")];
+            tensor<string, []> var_479_equation_0 = const()[name = tensor<string, []>("op_479_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_449_cast_fp16_2, var_470_cast_fp16))[name = tensor<string, []>("op_479_cast_fp16")];
+            tensor<string, []> var_481_equation_0 = const()[name = tensor<string, []>("op_481_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_481_cast_fp16 = einsum(equation = var_481_equation_0, values = (var_449_cast_fp16_3, var_471_cast_fp16))[name = tensor<string, []>("op_481_cast_fp16")];
+            tensor<string, []> var_483_equation_0 = const()[name = tensor<string, []>("op_483_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_449_cast_fp16_4, var_472_cast_fp16))[name = tensor<string, []>("op_483_cast_fp16")];
+            tensor<string, []> var_485_equation_0 = const()[name = tensor<string, []>("op_485_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_485_cast_fp16 = einsum(equation = var_485_equation_0, values = (var_449_cast_fp16_5, var_473_cast_fp16))[name = tensor<string, []>("op_485_cast_fp16")];
+            tensor<bool, []> input_25_interleave_0 = const()[name = tensor<string, []>("input_25_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 1500]> input_25_cast_fp16 = concat(axis = var_382, interleave = input_25_interleave_0, values = (var_475_cast_fp16, var_477_cast_fp16, var_479_cast_fp16, var_481_cast_fp16, var_483_cast_fp16, var_485_cast_fp16))[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<string, []> var_494_pad_type_0 = const()[name = tensor<string, []>("op_494_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_494_strides_0 = const()[name = tensor<string, []>("op_494_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_494_pad_0 = const()[name = tensor<string, []>("op_494_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_494_dilations_0 = const()[name = tensor<string, []>("op_494_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_494_groups_0 = const()[name = tensor<string, []>("op_494_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_2_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10209472)))];
+            tensor<fp16, [384]> blocks_2_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10504448)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_494_cast_fp16 = conv(bias = blocks_2_attn_out_bias_to_fp16, dilations = var_494_dilations_0, groups = var_494_groups_0, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_494_strides_0, weight = blocks_2_attn_out_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("op_494_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = var_494_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> input_27_axes_0 = const()[name = tensor<string, []>("input_27_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10505280)))];
+            tensor<fp16, [384]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10506112)))];
+            tensor<fp16, []> var_504_to_fp16 = const()[name = tensor<string, []>("op_504_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_27_cast_fp16 = layer_norm(axes = input_27_axes_0, beta = input_27_beta_0_to_fp16, epsilon = var_504_to_fp16, gamma = input_27_gamma_0_to_fp16, x = inputs_11_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_29_strides_0 = const()[name = tensor<string, []>("input_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_29_dilations_0 = const()[name = tensor<string, []>("input_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_29_groups_0 = const()[name = tensor<string, []>("input_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1536, 384, 1, 1]> blocks_2_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10506944)))];
+            tensor<fp16, [1536]> blocks_2_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11686656)))];
+            tensor<fp16, [1, 1536, 1, 1500]> input_29_cast_fp16 = conv(bias = blocks_2_mlp_0_bias_to_fp16, dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = blocks_2_mlp_0_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<string, []> var_530_pad_type_0 = const()[name = tensor<string, []>("op_530_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_530_strides_0 = const()[name = tensor<string, []>("op_530_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_530_pad_0 = const()[name = tensor<string, []>("op_530_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_530_dilations_0 = const()[name = tensor<string, []>("op_530_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_530_groups_0 = const()[name = tensor<string, []>("op_530_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 1536, 1, 1]> blocks_2_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11689792)))];
+            tensor<fp16, [384]> blocks_2_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12869504)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_530_cast_fp16 = conv(bias = blocks_2_mlp_2_bias_to_fp16, dilations = var_530_dilations_0, groups = var_530_groups_0, pad = var_530_pad_0, pad_type = var_530_pad_type_0, strides = var_530_strides_0, weight = blocks_2_mlp_2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("op_530_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = var_530_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_539 = const()[name = tensor<string, []>("op_539"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> input_33_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_33_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12870336)))];
+            tensor<fp16, [384]> input_33_beta_0_to_fp16 = const()[name = tensor<string, []>("input_33_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12871168)))];
+            tensor<fp16, []> var_555_to_fp16 = const()[name = tensor<string, []>("op_555_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, beta = input_33_beta_0_to_fp16, epsilon = var_555_to_fp16, gamma = input_33_gamma_0_to_fp16, x = inputs_13_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<string, []> q_pad_type_0 = const()[name = tensor<string, []>("q_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_strides_0 = const()[name = tensor<string, []>("q_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_pad_0 = const()[name = tensor<string, []>("q_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_dilations_0 = const()[name = tensor<string, []>("q_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_groups_0 = const()[name = tensor<string, []>("q_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> var_590_weight_0_to_fp16 = const()[name = tensor<string, []>("op_590_weight_0_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12872000)))];
+            tensor<fp16, [384]> var_590_bias_0_to_fp16 = const()[name = tensor<string, []>("op_590_bias_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13166976)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_590_cast_fp16 = conv(bias = var_590_bias_0_to_fp16, dilations = q_dilations_0, groups = q_groups_0, pad = q_pad_0, pad_type = q_pad_type_0, strides = q_strides_0, weight = var_590_weight_0_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_590_cast_fp16")];
+            tensor<string, []> k_pad_type_0 = const()[name = tensor<string, []>("k_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_strides_0 = const()[name = tensor<string, []>("k_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_pad_0 = const()[name = tensor<string, []>("k_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_dilations_0 = const()[name = tensor<string, []>("k_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_groups_0 = const()[name = tensor<string, []>("k_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_3_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_key_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13167808)))];
+            tensor<fp16, [1, 384, 1, 1500]> k_cast_fp16 = conv(dilations = k_dilations_0, groups = k_groups_0, pad = k_pad_0, pad_type = k_pad_type_0, strides = k_strides_0, weight = blocks_3_attn_key_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("k_cast_fp16")];
+            tensor<string, []> var_588_pad_type_0 = const()[name = tensor<string, []>("op_588_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_588_strides_0 = const()[name = tensor<string, []>("op_588_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_588_pad_0 = const()[name = tensor<string, []>("op_588_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_588_dilations_0 = const()[name = tensor<string, []>("op_588_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_588_groups_0 = const()[name = tensor<string, []>("op_588_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_3_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13462784)))];
+            tensor<fp16, [384]> blocks_3_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13757760)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_588_cast_fp16 = conv(bias = blocks_3_attn_value_bias_to_fp16, dilations = var_588_dilations_0, groups = var_588_groups_0, pad = var_588_pad_0, pad_type = var_588_pad_type_0, strides = var_588_strides_0, weight = blocks_3_attn_value_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_588_cast_fp16")];
+            tensor<int32, [6]> tile_9 = const()[name = tensor<string, []>("tile_9"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_591_axis_0 = const()[name = tensor<string, []>("op_591_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_591_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_591_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_591_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_591_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_591_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_591_cast_fp16_5 = split(axis = var_591_axis_0, split_sizes = tile_9, x = var_590_cast_fp16)[name = tensor<string, []>("op_591_cast_fp16")];
+            tensor<int32, [4]> var_598_perm_0 = const()[name = tensor<string, []>("op_598_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [6]> tile_10 = const()[name = tensor<string, []>("tile_10"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_599_axis_0 = const()[name = tensor<string, []>("op_599_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 384]> var_598_cast_fp16 = transpose(perm = var_598_perm_0, x = k_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1500, 1, 64]> var_599_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_599_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_599_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_599_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_599_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_599_cast_fp16_5 = split(axis = var_599_axis_0, split_sizes = tile_10, x = var_598_cast_fp16)[name = tensor<string, []>("op_599_cast_fp16")];
+            tensor<int32, [6]> tile_11 = const()[name = tensor<string, []>("tile_11"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_606_axis_0 = const()[name = tensor<string, []>("op_606_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_606_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_606_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_606_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_606_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_606_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_606_cast_fp16_5 = split(axis = var_606_axis_0, split_sizes = tile_11, x = var_588_cast_fp16)[name = tensor<string, []>("op_606_cast_fp16")];
+            tensor<string, []> aw_37_equation_0 = const()[name = tensor<string, []>("aw_37_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_37_cast_fp16 = einsum(equation = aw_37_equation_0, values = (var_599_cast_fp16_0, var_591_cast_fp16_0))[name = tensor<string, []>("aw_37_cast_fp16")];
+            tensor<string, []> aw_39_equation_0 = const()[name = tensor<string, []>("aw_39_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_39_cast_fp16 = einsum(equation = aw_39_equation_0, values = (var_599_cast_fp16_1, var_591_cast_fp16_1))[name = tensor<string, []>("aw_39_cast_fp16")];
+            tensor<string, []> aw_41_equation_0 = const()[name = tensor<string, []>("aw_41_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_41_cast_fp16 = einsum(equation = aw_41_equation_0, values = (var_599_cast_fp16_2, var_591_cast_fp16_2))[name = tensor<string, []>("aw_41_cast_fp16")];
+            tensor<string, []> aw_43_equation_0 = const()[name = tensor<string, []>("aw_43_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_43_cast_fp16 = einsum(equation = aw_43_equation_0, values = (var_599_cast_fp16_3, var_591_cast_fp16_3))[name = tensor<string, []>("aw_43_cast_fp16")];
+            tensor<string, []> aw_45_equation_0 = const()[name = tensor<string, []>("aw_45_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_45_cast_fp16 = einsum(equation = aw_45_equation_0, values = (var_599_cast_fp16_4, var_591_cast_fp16_4))[name = tensor<string, []>("aw_45_cast_fp16")];
+            tensor<string, []> aw_equation_0 = const()[name = tensor<string, []>("aw_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_cast_fp16 = einsum(equation = aw_equation_0, values = (var_599_cast_fp16_5, var_591_cast_fp16_5))[name = tensor<string, []>("aw_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_625_cast_fp16 = softmax(axis = var_539, x = aw_37_cast_fp16)[name = tensor<string, []>("op_625_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_626_cast_fp16 = softmax(axis = var_539, x = aw_39_cast_fp16)[name = tensor<string, []>("op_626_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_627_cast_fp16 = softmax(axis = var_539, x = aw_41_cast_fp16)[name = tensor<string, []>("op_627_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_628_cast_fp16 = softmax(axis = var_539, x = aw_43_cast_fp16)[name = tensor<string, []>("op_628_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_629_cast_fp16 = softmax(axis = var_539, x = aw_45_cast_fp16)[name = tensor<string, []>("op_629_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_630_cast_fp16 = softmax(axis = var_539, x = aw_cast_fp16)[name = tensor<string, []>("op_630_cast_fp16")];
+            tensor<string, []> var_632_equation_0 = const()[name = tensor<string, []>("op_632_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_632_cast_fp16 = einsum(equation = var_632_equation_0, values = (var_606_cast_fp16_0, var_625_cast_fp16))[name = tensor<string, []>("op_632_cast_fp16")];
+            tensor<string, []> var_634_equation_0 = const()[name = tensor<string, []>("op_634_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16 = einsum(equation = var_634_equation_0, values = (var_606_cast_fp16_1, var_626_cast_fp16))[name = tensor<string, []>("op_634_cast_fp16")];
+            tensor<string, []> var_636_equation_0 = const()[name = tensor<string, []>("op_636_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_636_cast_fp16 = einsum(equation = var_636_equation_0, values = (var_606_cast_fp16_2, var_627_cast_fp16))[name = tensor<string, []>("op_636_cast_fp16")];
+            tensor<string, []> var_638_equation_0 = const()[name = tensor<string, []>("op_638_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_638_cast_fp16 = einsum(equation = var_638_equation_0, values = (var_606_cast_fp16_3, var_628_cast_fp16))[name = tensor<string, []>("op_638_cast_fp16")];
+            tensor<string, []> var_640_equation_0 = const()[name = tensor<string, []>("op_640_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_640_cast_fp16 = einsum(equation = var_640_equation_0, values = (var_606_cast_fp16_4, var_629_cast_fp16))[name = tensor<string, []>("op_640_cast_fp16")];
+            tensor<string, []> var_642_equation_0 = const()[name = tensor<string, []>("op_642_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_642_cast_fp16 = einsum(equation = var_642_equation_0, values = (var_606_cast_fp16_5, var_630_cast_fp16))[name = tensor<string, []>("op_642_cast_fp16")];
+            tensor<bool, []> input_35_interleave_0 = const()[name = tensor<string, []>("input_35_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 1500]> input_35_cast_fp16 = concat(axis = var_539, interleave = input_35_interleave_0, values = (var_632_cast_fp16, var_634_cast_fp16, var_636_cast_fp16, var_638_cast_fp16, var_640_cast_fp16, var_642_cast_fp16))[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<string, []> var_651_pad_type_0 = const()[name = tensor<string, []>("op_651_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_651_strides_0 = const()[name = tensor<string, []>("op_651_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_651_pad_0 = const()[name = tensor<string, []>("op_651_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_651_dilations_0 = const()[name = tensor<string, []>("op_651_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_651_groups_0 = const()[name = tensor<string, []>("op_651_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_3_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13758592)))];
+            tensor<fp16, [384]> blocks_3_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14053568)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_651_cast_fp16 = conv(bias = blocks_3_attn_out_bias_to_fp16, dilations = var_651_dilations_0, groups = var_651_groups_0, pad = var_651_pad_0, pad_type = var_651_pad_type_0, strides = var_651_strides_0, weight = blocks_3_attn_out_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("op_651_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = var_651_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> input_37_axes_0 = const()[name = tensor<string, []>("input_37_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> input_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_37_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14054400)))];
+            tensor<fp16, [384]> input_37_beta_0_to_fp16 = const()[name = tensor<string, []>("input_37_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14055232)))];
+            tensor<fp16, []> var_661_to_fp16 = const()[name = tensor<string, []>("op_661_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = input_37_beta_0_to_fp16, epsilon = var_661_to_fp16, gamma = input_37_gamma_0_to_fp16, x = inputs_15_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_pad_type_0 = const()[name = tensor<string, []>("input_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_39_strides_0 = const()[name = tensor<string, []>("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_39_pad_0 = const()[name = tensor<string, []>("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_39_dilations_0 = const()[name = tensor<string, []>("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_39_groups_0 = const()[name = tensor<string, []>("input_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1536, 384, 1, 1]> blocks_3_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14056064)))];
+            tensor<fp16, [1536]> blocks_3_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15235776)))];
+            tensor<fp16, [1, 1536, 1, 1500]> input_39_cast_fp16 = conv(bias = blocks_3_mlp_0_bias_to_fp16, dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = blocks_3_mlp_0_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_39_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<string, []> var_687_pad_type_0 = const()[name = tensor<string, []>("op_687_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_687_strides_0 = const()[name = tensor<string, []>("op_687_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_687_pad_0 = const()[name = tensor<string, []>("op_687_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_687_dilations_0 = const()[name = tensor<string, []>("op_687_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_687_groups_0 = const()[name = tensor<string, []>("op_687_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 1536, 1, 1]> blocks_3_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15238912)))];
+            tensor<fp16, [384]> blocks_3_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16418624)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_687_cast_fp16 = conv(bias = blocks_3_mlp_2_bias_to_fp16, dilations = var_687_dilations_0, groups = var_687_groups_0, pad = var_687_pad_0, pad_type = var_687_pad_type_0, strides = var_687_strides_0, weight = blocks_3_mlp_2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("op_687_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_cast_fp16 = add(x = inputs_15_cast_fp16, y = var_687_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<int32, [1]> x_axes_0 = const()[name = tensor<string, []>("x_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> x_gamma_0_to_fp16 = const()[name = tensor<string, []>("x_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16419456)))];
+            tensor<fp16, [384]> x_beta_0_to_fp16 = const()[name = tensor<string, []>("x_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16420288)))];
+            tensor<fp16, []> var_701_to_fp16 = const()[name = tensor<string, []>("op_701_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> x_cast_fp16 = layer_norm(axes = x_axes_0, beta = x_beta_0_to_fp16, epsilon = var_701_to_fp16, gamma = x_gamma_0_to_fp16, x = inputs_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
+            tensor<int32, [1]> var_712_axes_0 = const()[name = tensor<string, []>("op_712_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 384, 1500]> var_712_cast_fp16 = squeeze(axes = var_712_axes_0, x = x_cast_fp16)[name = tensor<string, []>("op_712_cast_fp16")];
+            tensor<int32, [3]> var_715_perm_0 = const()[name = tensor<string, []>("op_715_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<string, []> var_715_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_715_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
+            tensor<fp16, [1, 1500, 384]> var_715_cast_fp16 = transpose(perm = var_715_perm_0, x = var_712_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp32, [1, 1500, 384]> output = cast(dtype = var_715_cast_fp16_to_fp32_dtype_0, x = var_715_cast_fp16)[name = tensor<string, []>("cast_19")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/tiny.en/ggml-tiny.en-encoder.mlmodelc/weights/weight.bin b/tiny.en/ggml-tiny.en-encoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d89d6272f407b442a55ccec48b193d3414930b6b
--- /dev/null
+++ b/tiny.en/ggml-tiny.en-encoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:040cc1dc03624b30f9f01e567d71b651729da26d98de36c72ab3266c85f68fab
+size 16421120
diff --git a/tiny.en/ggml-tiny.en.bin b/tiny.en/ggml-tiny.en.bin
new file mode 100644
index 0000000000000000000000000000000000000000..17ad750438d1d42162fe06ab4b21aef2389d2137
--- /dev/null
+++ b/tiny.en/ggml-tiny.en.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:921e4cf8686fdd993dcd081a5da5b6c365bfde1162e72b08d75ac75289920b1f
+size 77704715
diff --git a/tiny/.DS_Store b/tiny/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..776b14f7da0c1e89472bb0067a6c0955b0fae242
Binary files /dev/null and b/tiny/.DS_Store differ
diff --git a/tiny/ggml-tiny-encoder.mlmodelc/analytics/coremldata.bin b/tiny/ggml-tiny-encoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fb65fe66c88d9f34cbfc6d72c42eee5a375a471f
--- /dev/null
+++ b/tiny/ggml-tiny-encoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c811ab131cbe7fef6230b32b61cb04cc99fb5990e5fb70ab5d7ec907a4a124b2
+size 243
diff --git a/tiny/ggml-tiny-encoder.mlmodelc/coremldata.bin b/tiny/ggml-tiny-encoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..94d88431c84a536676db3d979d8407f9b298e689
--- /dev/null
+++ b/tiny/ggml-tiny-encoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9df7e387da981d548738ae3570db8a375e2e55d16ae8e1374da84809fdd0c5a
+size 320
diff --git a/tiny/ggml-tiny-encoder.mlmodelc/metadata.json b/tiny/ggml-tiny-encoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..c7f4d8063a97e36f6929a5717606155f752b1fcc
--- /dev/null
+++ b/tiny/ggml-tiny-encoder.mlmodelc/metadata.json
@@ -0,0 +1,71 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1500 × 384)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500, 384]",
+        "name" : "output",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 6,
+    "mlProgramOperationTypeHistogram" : {
+      "Concat" : 4,
+      "Gelu" : 6,
+      "LayerNorm" : 9,
+      "Transpose" : 5,
+      "Softmax" : 24,
+      "Squeeze" : 1,
+      "Cast" : 2,
+      "Add" : 9,
+      "Einsum" : 48,
+      "ExpandDims" : 1,
+      "Split" : 12,
+      "Conv" : 26
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "12.0",
+      "tvOS" : "15.0",
+      "visionOS" : "1.0",
+      "watchOS" : "8.0",
+      "iOS" : "15.0",
+      "macCatalyst" : "15.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.version" : "8.3.0",
+      "com.github.apple.coremltools.source" : "torch==2.2.2"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 80 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 3000]",
+        "name" : "logmel_data",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "coreml_encoder_tiny",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/tiny/ggml-tiny-encoder.mlmodelc/model.mil b/tiny/ggml-tiny-encoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..705ff46e2edf3b81a1e15e29babaefdf0ed49ab9
--- /dev/null
+++ b/tiny/ggml-tiny-encoder.mlmodelc/model.mil
@@ -0,0 +1,463 @@
+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.2.2"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
+{
+    func main<ios15>(tensor<fp32, [1, 80, 3000]> logmel_data) {
+            tensor<string, []> var_28_pad_type_0 = const()[name = tensor<string, []>("op_28_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_28_pad_0 = const()[name = tensor<string, []>("op_28_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_28_strides_0 = const()[name = tensor<string, []>("op_28_strides_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> var_28_dilations_0 = const()[name = tensor<string, []>("op_28_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_28_groups_0 = const()[name = tensor<string, []>("op_28_groups_0"), val = tensor<int32, []>(1)];
+            tensor<string, []> logmel_data_to_fp16_dtype_0 = const()[name = tensor<string, []>("logmel_data_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [384, 80, 3]> const_0_to_fp16 = const()[name = tensor<string, []>("const_0_to_fp16"), val = tensor<fp16, [384, 80, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [384]> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(184448)))];
+            tensor<fp16, [1, 80, 3000]> logmel_data_to_fp16 = cast(dtype = logmel_data_to_fp16_dtype_0, x = logmel_data)[name = tensor<string, []>("cast_20")];
+            tensor<fp16, [1, 384, 3000]> var_28_cast_fp16 = conv(bias = const_1_to_fp16, dilations = var_28_dilations_0, groups = var_28_groups_0, pad = var_28_pad_0, pad_type = var_28_pad_type_0, strides = var_28_strides_0, weight = const_0_to_fp16, x = logmel_data_to_fp16)[name = tensor<string, []>("op_28_cast_fp16")];
+            tensor<string, []> input_1_mode_0 = const()[name = tensor<string, []>("input_1_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 384, 3000]> input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_28_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<string, []> var_46_pad_type_0 = const()[name = tensor<string, []>("op_46_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [2]> var_46_pad_0 = const()[name = tensor<string, []>("op_46_pad_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [1]> var_46_strides_0 = const()[name = tensor<string, []>("op_46_strides_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1]> var_46_dilations_0 = const()[name = tensor<string, []>("op_46_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> var_46_groups_0 = const()[name = tensor<string, []>("op_46_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 3]> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, [384, 384, 3]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185280)))];
+            tensor<fp16, [384]> const_3_to_fp16 = const()[name = tensor<string, []>("const_3_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1070080)))];
+            tensor<fp16, [1, 384, 1500]> var_46_cast_fp16 = conv(bias = const_3_to_fp16, dilations = var_46_dilations_0, groups = var_46_groups_0, pad = var_46_pad_0, pad_type = var_46_pad_type_0, strides = var_46_strides_0, weight = const_2_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("op_46_cast_fp16")];
+            tensor<string, []> x_3_mode_0 = const()[name = tensor<string, []>("x_3_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 384, 1500]> x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_46_cast_fp16)[name = tensor<string, []>("x_3_cast_fp16")];
+            tensor<fp16, [384, 1500]> var_51_to_fp16 = const()[name = tensor<string, []>("op_51_to_fp16"), val = tensor<fp16, [384, 1500]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1070912)))];
+            tensor<fp16, [1, 384, 1500]> var_53_cast_fp16 = add(x = x_3_cast_fp16, y = var_51_to_fp16)[name = tensor<string, []>("op_53_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = tensor<string, []>("inputs_1_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_53_cast_fp16)[name = tensor<string, []>("inputs_1_cast_fp16")];
+            tensor<int32, []> var_68 = const()[name = tensor<string, []>("op_68"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_3_axes_0 = const()[name = tensor<string, []>("input_3_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> input_3_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_3_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2222976)))];
+            tensor<fp16, [384]> input_3_beta_0_to_fp16 = const()[name = tensor<string, []>("input_3_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2223808)))];
+            tensor<fp16, []> var_84_to_fp16 = const()[name = tensor<string, []>("op_84_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_3_cast_fp16 = layer_norm(axes = input_3_axes_0, beta = input_3_beta_0_to_fp16, epsilon = var_84_to_fp16, gamma = input_3_gamma_0_to_fp16, x = inputs_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<string, []> q_1_pad_type_0 = const()[name = tensor<string, []>("q_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_1_strides_0 = const()[name = tensor<string, []>("q_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_1_pad_0 = const()[name = tensor<string, []>("q_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_1_dilations_0 = const()[name = tensor<string, []>("q_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_1_groups_0 = const()[name = tensor<string, []>("q_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> var_119_weight_0_to_fp16 = const()[name = tensor<string, []>("op_119_weight_0_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2224640)))];
+            tensor<fp16, [384]> var_119_bias_0_to_fp16 = const()[name = tensor<string, []>("op_119_bias_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2519616)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_119_cast_fp16 = conv(bias = var_119_bias_0_to_fp16, dilations = q_1_dilations_0, groups = q_1_groups_0, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = q_1_strides_0, weight = var_119_weight_0_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_119_cast_fp16")];
+            tensor<string, []> k_1_pad_type_0 = const()[name = tensor<string, []>("k_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_1_strides_0 = const()[name = tensor<string, []>("k_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_1_pad_0 = const()[name = tensor<string, []>("k_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_1_dilations_0 = const()[name = tensor<string, []>("k_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_1_groups_0 = const()[name = tensor<string, []>("k_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_0_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_key_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2520448)))];
+            tensor<fp16, [1, 384, 1, 1500]> k_1_cast_fp16 = conv(dilations = k_1_dilations_0, groups = k_1_groups_0, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = k_1_strides_0, weight = blocks_0_attn_key_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("k_1_cast_fp16")];
+            tensor<string, []> var_117_pad_type_0 = const()[name = tensor<string, []>("op_117_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_117_strides_0 = const()[name = tensor<string, []>("op_117_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_117_pad_0 = const()[name = tensor<string, []>("op_117_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_117_dilations_0 = const()[name = tensor<string, []>("op_117_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_117_groups_0 = const()[name = tensor<string, []>("op_117_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_0_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2815424)))];
+            tensor<fp16, [384]> blocks_0_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_value_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3110400)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_117_cast_fp16 = conv(bias = blocks_0_attn_value_bias_to_fp16, dilations = var_117_dilations_0, groups = var_117_groups_0, pad = var_117_pad_0, pad_type = var_117_pad_type_0, strides = var_117_strides_0, weight = blocks_0_attn_value_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("op_117_cast_fp16")];
+            tensor<int32, [6]> tile_0 = const()[name = tensor<string, []>("tile_0"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_120_axis_0 = const()[name = tensor<string, []>("op_120_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_120_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_120_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_120_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_120_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_120_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_120_cast_fp16_5 = split(axis = var_120_axis_0, split_sizes = tile_0, x = var_119_cast_fp16)[name = tensor<string, []>("op_120_cast_fp16")];
+            tensor<int32, [4]> var_127_perm_0 = const()[name = tensor<string, []>("op_127_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [6]> tile_1 = const()[name = tensor<string, []>("tile_1"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_128_axis_0 = const()[name = tensor<string, []>("op_128_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 384]> var_127_cast_fp16 = transpose(perm = var_127_perm_0, x = k_1_cast_fp16)[name = tensor<string, []>("transpose_4")];
+            tensor<fp16, [1, 1500, 1, 64]> var_128_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_128_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_128_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_128_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_128_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_128_cast_fp16_5 = split(axis = var_128_axis_0, split_sizes = tile_1, x = var_127_cast_fp16)[name = tensor<string, []>("op_128_cast_fp16")];
+            tensor<int32, [6]> tile_2 = const()[name = tensor<string, []>("tile_2"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_135_axis_0 = const()[name = tensor<string, []>("op_135_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_135_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_135_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_135_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_135_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_135_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_135_cast_fp16_5 = split(axis = var_135_axis_0, split_sizes = tile_2, x = var_117_cast_fp16)[name = tensor<string, []>("op_135_cast_fp16")];
+            tensor<string, []> aw_1_equation_0 = const()[name = tensor<string, []>("aw_1_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_1_cast_fp16 = einsum(equation = aw_1_equation_0, values = (var_128_cast_fp16_0, var_120_cast_fp16_0))[name = tensor<string, []>("aw_1_cast_fp16")];
+            tensor<string, []> aw_3_equation_0 = const()[name = tensor<string, []>("aw_3_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_3_cast_fp16 = einsum(equation = aw_3_equation_0, values = (var_128_cast_fp16_1, var_120_cast_fp16_1))[name = tensor<string, []>("aw_3_cast_fp16")];
+            tensor<string, []> aw_5_equation_0 = const()[name = tensor<string, []>("aw_5_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_5_cast_fp16 = einsum(equation = aw_5_equation_0, values = (var_128_cast_fp16_2, var_120_cast_fp16_2))[name = tensor<string, []>("aw_5_cast_fp16")];
+            tensor<string, []> aw_7_equation_0 = const()[name = tensor<string, []>("aw_7_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_7_cast_fp16 = einsum(equation = aw_7_equation_0, values = (var_128_cast_fp16_3, var_120_cast_fp16_3))[name = tensor<string, []>("aw_7_cast_fp16")];
+            tensor<string, []> aw_9_equation_0 = const()[name = tensor<string, []>("aw_9_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_9_cast_fp16 = einsum(equation = aw_9_equation_0, values = (var_128_cast_fp16_4, var_120_cast_fp16_4))[name = tensor<string, []>("aw_9_cast_fp16")];
+            tensor<string, []> aw_11_equation_0 = const()[name = tensor<string, []>("aw_11_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_11_cast_fp16 = einsum(equation = aw_11_equation_0, values = (var_128_cast_fp16_5, var_120_cast_fp16_5))[name = tensor<string, []>("aw_11_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_154_cast_fp16 = softmax(axis = var_68, x = aw_1_cast_fp16)[name = tensor<string, []>("op_154_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_155_cast_fp16 = softmax(axis = var_68, x = aw_3_cast_fp16)[name = tensor<string, []>("op_155_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_156_cast_fp16 = softmax(axis = var_68, x = aw_5_cast_fp16)[name = tensor<string, []>("op_156_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_157_cast_fp16 = softmax(axis = var_68, x = aw_7_cast_fp16)[name = tensor<string, []>("op_157_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_158_cast_fp16 = softmax(axis = var_68, x = aw_9_cast_fp16)[name = tensor<string, []>("op_158_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_159_cast_fp16 = softmax(axis = var_68, x = aw_11_cast_fp16)[name = tensor<string, []>("op_159_cast_fp16")];
+            tensor<string, []> var_161_equation_0 = const()[name = tensor<string, []>("op_161_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_161_cast_fp16 = einsum(equation = var_161_equation_0, values = (var_135_cast_fp16_0, var_154_cast_fp16))[name = tensor<string, []>("op_161_cast_fp16")];
+            tensor<string, []> var_163_equation_0 = const()[name = tensor<string, []>("op_163_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_163_cast_fp16 = einsum(equation = var_163_equation_0, values = (var_135_cast_fp16_1, var_155_cast_fp16))[name = tensor<string, []>("op_163_cast_fp16")];
+            tensor<string, []> var_165_equation_0 = const()[name = tensor<string, []>("op_165_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_165_cast_fp16 = einsum(equation = var_165_equation_0, values = (var_135_cast_fp16_2, var_156_cast_fp16))[name = tensor<string, []>("op_165_cast_fp16")];
+            tensor<string, []> var_167_equation_0 = const()[name = tensor<string, []>("op_167_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_167_cast_fp16 = einsum(equation = var_167_equation_0, values = (var_135_cast_fp16_3, var_157_cast_fp16))[name = tensor<string, []>("op_167_cast_fp16")];
+            tensor<string, []> var_169_equation_0 = const()[name = tensor<string, []>("op_169_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_169_cast_fp16 = einsum(equation = var_169_equation_0, values = (var_135_cast_fp16_4, var_158_cast_fp16))[name = tensor<string, []>("op_169_cast_fp16")];
+            tensor<string, []> var_171_equation_0 = const()[name = tensor<string, []>("op_171_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_171_cast_fp16 = einsum(equation = var_171_equation_0, values = (var_135_cast_fp16_5, var_159_cast_fp16))[name = tensor<string, []>("op_171_cast_fp16")];
+            tensor<bool, []> input_5_interleave_0 = const()[name = tensor<string, []>("input_5_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 1500]> input_5_cast_fp16 = concat(axis = var_68, interleave = input_5_interleave_0, values = (var_161_cast_fp16, var_163_cast_fp16, var_165_cast_fp16, var_167_cast_fp16, var_169_cast_fp16, var_171_cast_fp16))[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<string, []> var_180_pad_type_0 = const()[name = tensor<string, []>("op_180_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_180_strides_0 = const()[name = tensor<string, []>("op_180_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_180_pad_0 = const()[name = tensor<string, []>("op_180_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_180_dilations_0 = const()[name = tensor<string, []>("op_180_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_180_groups_0 = const()[name = tensor<string, []>("op_180_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_0_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3111232)))];
+            tensor<fp16, [384]> blocks_0_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_attn_out_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3406208)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_180_cast_fp16 = conv(bias = blocks_0_attn_out_bias_to_fp16, dilations = var_180_dilations_0, groups = var_180_groups_0, pad = var_180_pad_0, pad_type = var_180_pad_type_0, strides = var_180_strides_0, weight = blocks_0_attn_out_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("op_180_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = var_180_cast_fp16)[name = tensor<string, []>("inputs_3_cast_fp16")];
+            tensor<int32, [1]> input_7_axes_0 = const()[name = tensor<string, []>("input_7_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> input_7_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_7_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3407040)))];
+            tensor<fp16, [384]> input_7_beta_0_to_fp16 = const()[name = tensor<string, []>("input_7_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3407872)))];
+            tensor<fp16, []> var_190_to_fp16 = const()[name = tensor<string, []>("op_190_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = input_7_beta_0_to_fp16, epsilon = var_190_to_fp16, gamma = input_7_gamma_0_to_fp16, x = inputs_3_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<string, []> input_9_pad_type_0 = const()[name = tensor<string, []>("input_9_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_9_strides_0 = const()[name = tensor<string, []>("input_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_9_pad_0 = const()[name = tensor<string, []>("input_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_9_dilations_0 = const()[name = tensor<string, []>("input_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_9_groups_0 = const()[name = tensor<string, []>("input_9_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1536, 384, 1, 1]> blocks_0_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3408704)))];
+            tensor<fp16, [1536]> blocks_0_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_0_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4588416)))];
+            tensor<fp16, [1, 1536, 1, 1500]> input_9_cast_fp16 = conv(bias = blocks_0_mlp_0_bias_to_fp16, dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = blocks_0_mlp_0_weight_to_fp16, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<string, []> input_11_mode_0 = const()[name = tensor<string, []>("input_11_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1500]> input_11_cast_fp16 = gelu(mode = input_11_mode_0, x = input_9_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<string, []> var_216_pad_type_0 = const()[name = tensor<string, []>("op_216_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_216_strides_0 = const()[name = tensor<string, []>("op_216_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_216_pad_0 = const()[name = tensor<string, []>("op_216_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_216_dilations_0 = const()[name = tensor<string, []>("op_216_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_216_groups_0 = const()[name = tensor<string, []>("op_216_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 1536, 1, 1]> blocks_0_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4591552)))];
+            tensor<fp16, [384]> blocks_0_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_0_mlp_2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5771264)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_216_cast_fp16 = conv(bias = blocks_0_mlp_2_bias_to_fp16, dilations = var_216_dilations_0, groups = var_216_groups_0, pad = var_216_pad_0, pad_type = var_216_pad_type_0, strides = var_216_strides_0, weight = blocks_0_mlp_2_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("op_216_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = var_216_cast_fp16)[name = tensor<string, []>("inputs_5_cast_fp16")];
+            tensor<int32, []> var_225 = const()[name = tensor<string, []>("op_225"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_13_axes_0 = const()[name = tensor<string, []>("input_13_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> input_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_13_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5772096)))];
+            tensor<fp16, [384]> input_13_beta_0_to_fp16 = const()[name = tensor<string, []>("input_13_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5772928)))];
+            tensor<fp16, []> var_241_to_fp16 = const()[name = tensor<string, []>("op_241_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = input_13_beta_0_to_fp16, epsilon = var_241_to_fp16, gamma = input_13_gamma_0_to_fp16, x = inputs_5_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<string, []> q_3_pad_type_0 = const()[name = tensor<string, []>("q_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_3_strides_0 = const()[name = tensor<string, []>("q_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_3_pad_0 = const()[name = tensor<string, []>("q_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_3_dilations_0 = const()[name = tensor<string, []>("q_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_3_groups_0 = const()[name = tensor<string, []>("q_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> var_276_weight_0_to_fp16 = const()[name = tensor<string, []>("op_276_weight_0_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5773760)))];
+            tensor<fp16, [384]> var_276_bias_0_to_fp16 = const()[name = tensor<string, []>("op_276_bias_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6068736)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_276_cast_fp16 = conv(bias = var_276_bias_0_to_fp16, dilations = q_3_dilations_0, groups = q_3_groups_0, pad = q_3_pad_0, pad_type = q_3_pad_type_0, strides = q_3_strides_0, weight = var_276_weight_0_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_276_cast_fp16")];
+            tensor<string, []> k_3_pad_type_0 = const()[name = tensor<string, []>("k_3_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_3_strides_0 = const()[name = tensor<string, []>("k_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_3_pad_0 = const()[name = tensor<string, []>("k_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_3_dilations_0 = const()[name = tensor<string, []>("k_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_3_groups_0 = const()[name = tensor<string, []>("k_3_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_1_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_key_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6069568)))];
+            tensor<fp16, [1, 384, 1, 1500]> k_3_cast_fp16 = conv(dilations = k_3_dilations_0, groups = k_3_groups_0, pad = k_3_pad_0, pad_type = k_3_pad_type_0, strides = k_3_strides_0, weight = blocks_1_attn_key_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("k_3_cast_fp16")];
+            tensor<string, []> var_274_pad_type_0 = const()[name = tensor<string, []>("op_274_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_274_strides_0 = const()[name = tensor<string, []>("op_274_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_274_pad_0 = const()[name = tensor<string, []>("op_274_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_274_dilations_0 = const()[name = tensor<string, []>("op_274_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_274_groups_0 = const()[name = tensor<string, []>("op_274_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_1_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6364544)))];
+            tensor<fp16, [384]> blocks_1_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_value_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6659520)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_274_cast_fp16 = conv(bias = blocks_1_attn_value_bias_to_fp16, dilations = var_274_dilations_0, groups = var_274_groups_0, pad = var_274_pad_0, pad_type = var_274_pad_type_0, strides = var_274_strides_0, weight = blocks_1_attn_value_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("op_274_cast_fp16")];
+            tensor<int32, [6]> tile_3 = const()[name = tensor<string, []>("tile_3"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_277_axis_0 = const()[name = tensor<string, []>("op_277_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_277_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_277_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_277_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_277_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_277_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_277_cast_fp16_5 = split(axis = var_277_axis_0, split_sizes = tile_3, x = var_276_cast_fp16)[name = tensor<string, []>("op_277_cast_fp16")];
+            tensor<int32, [4]> var_284_perm_0 = const()[name = tensor<string, []>("op_284_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [6]> tile_4 = const()[name = tensor<string, []>("tile_4"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_285_axis_0 = const()[name = tensor<string, []>("op_285_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 384]> var_284_cast_fp16 = transpose(perm = var_284_perm_0, x = k_3_cast_fp16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1500, 1, 64]> var_285_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_285_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_285_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_285_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_285_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_285_cast_fp16_5 = split(axis = var_285_axis_0, split_sizes = tile_4, x = var_284_cast_fp16)[name = tensor<string, []>("op_285_cast_fp16")];
+            tensor<int32, [6]> tile_5 = const()[name = tensor<string, []>("tile_5"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_292_axis_0 = const()[name = tensor<string, []>("op_292_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_292_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_292_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_292_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_292_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_292_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_292_cast_fp16_5 = split(axis = var_292_axis_0, split_sizes = tile_5, x = var_274_cast_fp16)[name = tensor<string, []>("op_292_cast_fp16")];
+            tensor<string, []> aw_13_equation_0 = const()[name = tensor<string, []>("aw_13_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_13_cast_fp16 = einsum(equation = aw_13_equation_0, values = (var_285_cast_fp16_0, var_277_cast_fp16_0))[name = tensor<string, []>("aw_13_cast_fp16")];
+            tensor<string, []> aw_15_equation_0 = const()[name = tensor<string, []>("aw_15_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_15_cast_fp16 = einsum(equation = aw_15_equation_0, values = (var_285_cast_fp16_1, var_277_cast_fp16_1))[name = tensor<string, []>("aw_15_cast_fp16")];
+            tensor<string, []> aw_17_equation_0 = const()[name = tensor<string, []>("aw_17_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_17_cast_fp16 = einsum(equation = aw_17_equation_0, values = (var_285_cast_fp16_2, var_277_cast_fp16_2))[name = tensor<string, []>("aw_17_cast_fp16")];
+            tensor<string, []> aw_19_equation_0 = const()[name = tensor<string, []>("aw_19_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_19_cast_fp16 = einsum(equation = aw_19_equation_0, values = (var_285_cast_fp16_3, var_277_cast_fp16_3))[name = tensor<string, []>("aw_19_cast_fp16")];
+            tensor<string, []> aw_21_equation_0 = const()[name = tensor<string, []>("aw_21_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_21_cast_fp16 = einsum(equation = aw_21_equation_0, values = (var_285_cast_fp16_4, var_277_cast_fp16_4))[name = tensor<string, []>("aw_21_cast_fp16")];
+            tensor<string, []> aw_23_equation_0 = const()[name = tensor<string, []>("aw_23_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_23_cast_fp16 = einsum(equation = aw_23_equation_0, values = (var_285_cast_fp16_5, var_277_cast_fp16_5))[name = tensor<string, []>("aw_23_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_311_cast_fp16 = softmax(axis = var_225, x = aw_13_cast_fp16)[name = tensor<string, []>("op_311_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_312_cast_fp16 = softmax(axis = var_225, x = aw_15_cast_fp16)[name = tensor<string, []>("op_312_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_313_cast_fp16 = softmax(axis = var_225, x = aw_17_cast_fp16)[name = tensor<string, []>("op_313_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_314_cast_fp16 = softmax(axis = var_225, x = aw_19_cast_fp16)[name = tensor<string, []>("op_314_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_315_cast_fp16 = softmax(axis = var_225, x = aw_21_cast_fp16)[name = tensor<string, []>("op_315_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_316_cast_fp16 = softmax(axis = var_225, x = aw_23_cast_fp16)[name = tensor<string, []>("op_316_cast_fp16")];
+            tensor<string, []> var_318_equation_0 = const()[name = tensor<string, []>("op_318_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_318_cast_fp16 = einsum(equation = var_318_equation_0, values = (var_292_cast_fp16_0, var_311_cast_fp16))[name = tensor<string, []>("op_318_cast_fp16")];
+            tensor<string, []> var_320_equation_0 = const()[name = tensor<string, []>("op_320_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_320_cast_fp16 = einsum(equation = var_320_equation_0, values = (var_292_cast_fp16_1, var_312_cast_fp16))[name = tensor<string, []>("op_320_cast_fp16")];
+            tensor<string, []> var_322_equation_0 = const()[name = tensor<string, []>("op_322_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_322_cast_fp16 = einsum(equation = var_322_equation_0, values = (var_292_cast_fp16_2, var_313_cast_fp16))[name = tensor<string, []>("op_322_cast_fp16")];
+            tensor<string, []> var_324_equation_0 = const()[name = tensor<string, []>("op_324_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_324_cast_fp16 = einsum(equation = var_324_equation_0, values = (var_292_cast_fp16_3, var_314_cast_fp16))[name = tensor<string, []>("op_324_cast_fp16")];
+            tensor<string, []> var_326_equation_0 = const()[name = tensor<string, []>("op_326_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_326_cast_fp16 = einsum(equation = var_326_equation_0, values = (var_292_cast_fp16_4, var_315_cast_fp16))[name = tensor<string, []>("op_326_cast_fp16")];
+            tensor<string, []> var_328_equation_0 = const()[name = tensor<string, []>("op_328_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_328_cast_fp16 = einsum(equation = var_328_equation_0, values = (var_292_cast_fp16_5, var_316_cast_fp16))[name = tensor<string, []>("op_328_cast_fp16")];
+            tensor<bool, []> input_15_interleave_0 = const()[name = tensor<string, []>("input_15_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 1500]> input_15_cast_fp16 = concat(axis = var_225, interleave = input_15_interleave_0, values = (var_318_cast_fp16, var_320_cast_fp16, var_322_cast_fp16, var_324_cast_fp16, var_326_cast_fp16, var_328_cast_fp16))[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<string, []> var_337_pad_type_0 = const()[name = tensor<string, []>("op_337_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_337_strides_0 = const()[name = tensor<string, []>("op_337_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_337_pad_0 = const()[name = tensor<string, []>("op_337_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_337_dilations_0 = const()[name = tensor<string, []>("op_337_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_337_groups_0 = const()[name = tensor<string, []>("op_337_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_1_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6660352)))];
+            tensor<fp16, [384]> blocks_1_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_attn_out_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6955328)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_337_cast_fp16 = conv(bias = blocks_1_attn_out_bias_to_fp16, dilations = var_337_dilations_0, groups = var_337_groups_0, pad = var_337_pad_0, pad_type = var_337_pad_type_0, strides = var_337_strides_0, weight = blocks_1_attn_out_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("op_337_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = var_337_cast_fp16)[name = tensor<string, []>("inputs_7_cast_fp16")];
+            tensor<int32, [1]> input_17_axes_0 = const()[name = tensor<string, []>("input_17_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> input_17_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_17_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6956160)))];
+            tensor<fp16, [384]> input_17_beta_0_to_fp16 = const()[name = tensor<string, []>("input_17_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6956992)))];
+            tensor<fp16, []> var_347_to_fp16 = const()[name = tensor<string, []>("op_347_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_17_cast_fp16 = layer_norm(axes = input_17_axes_0, beta = input_17_beta_0_to_fp16, epsilon = var_347_to_fp16, gamma = input_17_gamma_0_to_fp16, x = inputs_7_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<string, []> input_19_pad_type_0 = const()[name = tensor<string, []>("input_19_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_19_strides_0 = const()[name = tensor<string, []>("input_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_19_pad_0 = const()[name = tensor<string, []>("input_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_19_dilations_0 = const()[name = tensor<string, []>("input_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_19_groups_0 = const()[name = tensor<string, []>("input_19_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1536, 384, 1, 1]> blocks_1_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6957824)))];
+            tensor<fp16, [1536]> blocks_1_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_0_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8137536)))];
+            tensor<fp16, [1, 1536, 1, 1500]> input_19_cast_fp16 = conv(bias = blocks_1_mlp_0_bias_to_fp16, dilations = input_19_dilations_0, groups = input_19_groups_0, pad = input_19_pad_0, pad_type = input_19_pad_type_0, strides = input_19_strides_0, weight = blocks_1_mlp_0_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<string, []> input_21_mode_0 = const()[name = tensor<string, []>("input_21_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1500]> input_21_cast_fp16 = gelu(mode = input_21_mode_0, x = input_19_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<string, []> var_373_pad_type_0 = const()[name = tensor<string, []>("op_373_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_373_strides_0 = const()[name = tensor<string, []>("op_373_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_373_pad_0 = const()[name = tensor<string, []>("op_373_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_373_dilations_0 = const()[name = tensor<string, []>("op_373_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_373_groups_0 = const()[name = tensor<string, []>("op_373_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 1536, 1, 1]> blocks_1_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8140672)))];
+            tensor<fp16, [384]> blocks_1_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_1_mlp_2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9320384)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_373_cast_fp16 = conv(bias = blocks_1_mlp_2_bias_to_fp16, dilations = var_373_dilations_0, groups = var_373_groups_0, pad = var_373_pad_0, pad_type = var_373_pad_type_0, strides = var_373_strides_0, weight = blocks_1_mlp_2_weight_to_fp16, x = input_21_cast_fp16)[name = tensor<string, []>("op_373_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = var_373_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
+            tensor<int32, []> var_382 = const()[name = tensor<string, []>("op_382"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_23_axes_0 = const()[name = tensor<string, []>("input_23_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> input_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_23_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9321216)))];
+            tensor<fp16, [384]> input_23_beta_0_to_fp16 = const()[name = tensor<string, []>("input_23_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9322048)))];
+            tensor<fp16, []> var_398_to_fp16 = const()[name = tensor<string, []>("op_398_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_23_cast_fp16 = layer_norm(axes = input_23_axes_0, beta = input_23_beta_0_to_fp16, epsilon = var_398_to_fp16, gamma = input_23_gamma_0_to_fp16, x = inputs_9_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<string, []> q_5_pad_type_0 = const()[name = tensor<string, []>("q_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_5_strides_0 = const()[name = tensor<string, []>("q_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_5_pad_0 = const()[name = tensor<string, []>("q_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_5_dilations_0 = const()[name = tensor<string, []>("q_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_5_groups_0 = const()[name = tensor<string, []>("q_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> var_433_weight_0_to_fp16 = const()[name = tensor<string, []>("op_433_weight_0_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9322880)))];
+            tensor<fp16, [384]> var_433_bias_0_to_fp16 = const()[name = tensor<string, []>("op_433_bias_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9617856)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_433_cast_fp16 = conv(bias = var_433_bias_0_to_fp16, dilations = q_5_dilations_0, groups = q_5_groups_0, pad = q_5_pad_0, pad_type = q_5_pad_type_0, strides = q_5_strides_0, weight = var_433_weight_0_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_433_cast_fp16")];
+            tensor<string, []> k_5_pad_type_0 = const()[name = tensor<string, []>("k_5_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_5_strides_0 = const()[name = tensor<string, []>("k_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_5_pad_0 = const()[name = tensor<string, []>("k_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_5_dilations_0 = const()[name = tensor<string, []>("k_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_5_groups_0 = const()[name = tensor<string, []>("k_5_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_2_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_key_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9618688)))];
+            tensor<fp16, [1, 384, 1, 1500]> k_5_cast_fp16 = conv(dilations = k_5_dilations_0, groups = k_5_groups_0, pad = k_5_pad_0, pad_type = k_5_pad_type_0, strides = k_5_strides_0, weight = blocks_2_attn_key_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("k_5_cast_fp16")];
+            tensor<string, []> var_431_pad_type_0 = const()[name = tensor<string, []>("op_431_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_431_strides_0 = const()[name = tensor<string, []>("op_431_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_431_pad_0 = const()[name = tensor<string, []>("op_431_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_431_dilations_0 = const()[name = tensor<string, []>("op_431_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_431_groups_0 = const()[name = tensor<string, []>("op_431_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_2_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9913664)))];
+            tensor<fp16, [384]> blocks_2_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_value_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10208640)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_431_cast_fp16 = conv(bias = blocks_2_attn_value_bias_to_fp16, dilations = var_431_dilations_0, groups = var_431_groups_0, pad = var_431_pad_0, pad_type = var_431_pad_type_0, strides = var_431_strides_0, weight = blocks_2_attn_value_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("op_431_cast_fp16")];
+            tensor<int32, [6]> tile_6 = const()[name = tensor<string, []>("tile_6"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_434_axis_0 = const()[name = tensor<string, []>("op_434_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_434_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_434_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_434_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_434_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_434_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_434_cast_fp16_5 = split(axis = var_434_axis_0, split_sizes = tile_6, x = var_433_cast_fp16)[name = tensor<string, []>("op_434_cast_fp16")];
+            tensor<int32, [4]> var_441_perm_0 = const()[name = tensor<string, []>("op_441_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [6]> tile_7 = const()[name = tensor<string, []>("tile_7"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_442_axis_0 = const()[name = tensor<string, []>("op_442_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 384]> var_441_cast_fp16 = transpose(perm = var_441_perm_0, x = k_5_cast_fp16)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 1500, 1, 64]> var_442_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_442_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_442_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_442_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_442_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_442_cast_fp16_5 = split(axis = var_442_axis_0, split_sizes = tile_7, x = var_441_cast_fp16)[name = tensor<string, []>("op_442_cast_fp16")];
+            tensor<int32, [6]> tile_8 = const()[name = tensor<string, []>("tile_8"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_449_axis_0 = const()[name = tensor<string, []>("op_449_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_449_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_449_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_449_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_449_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_449_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_449_cast_fp16_5 = split(axis = var_449_axis_0, split_sizes = tile_8, x = var_431_cast_fp16)[name = tensor<string, []>("op_449_cast_fp16")];
+            tensor<string, []> aw_25_equation_0 = const()[name = tensor<string, []>("aw_25_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_25_cast_fp16 = einsum(equation = aw_25_equation_0, values = (var_442_cast_fp16_0, var_434_cast_fp16_0))[name = tensor<string, []>("aw_25_cast_fp16")];
+            tensor<string, []> aw_27_equation_0 = const()[name = tensor<string, []>("aw_27_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_27_cast_fp16 = einsum(equation = aw_27_equation_0, values = (var_442_cast_fp16_1, var_434_cast_fp16_1))[name = tensor<string, []>("aw_27_cast_fp16")];
+            tensor<string, []> aw_29_equation_0 = const()[name = tensor<string, []>("aw_29_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_29_cast_fp16 = einsum(equation = aw_29_equation_0, values = (var_442_cast_fp16_2, var_434_cast_fp16_2))[name = tensor<string, []>("aw_29_cast_fp16")];
+            tensor<string, []> aw_31_equation_0 = const()[name = tensor<string, []>("aw_31_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_31_cast_fp16 = einsum(equation = aw_31_equation_0, values = (var_442_cast_fp16_3, var_434_cast_fp16_3))[name = tensor<string, []>("aw_31_cast_fp16")];
+            tensor<string, []> aw_33_equation_0 = const()[name = tensor<string, []>("aw_33_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_33_cast_fp16 = einsum(equation = aw_33_equation_0, values = (var_442_cast_fp16_4, var_434_cast_fp16_4))[name = tensor<string, []>("aw_33_cast_fp16")];
+            tensor<string, []> aw_35_equation_0 = const()[name = tensor<string, []>("aw_35_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_35_cast_fp16 = einsum(equation = aw_35_equation_0, values = (var_442_cast_fp16_5, var_434_cast_fp16_5))[name = tensor<string, []>("aw_35_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_468_cast_fp16 = softmax(axis = var_382, x = aw_25_cast_fp16)[name = tensor<string, []>("op_468_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_469_cast_fp16 = softmax(axis = var_382, x = aw_27_cast_fp16)[name = tensor<string, []>("op_469_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_470_cast_fp16 = softmax(axis = var_382, x = aw_29_cast_fp16)[name = tensor<string, []>("op_470_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_471_cast_fp16 = softmax(axis = var_382, x = aw_31_cast_fp16)[name = tensor<string, []>("op_471_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_472_cast_fp16 = softmax(axis = var_382, x = aw_33_cast_fp16)[name = tensor<string, []>("op_472_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_473_cast_fp16 = softmax(axis = var_382, x = aw_35_cast_fp16)[name = tensor<string, []>("op_473_cast_fp16")];
+            tensor<string, []> var_475_equation_0 = const()[name = tensor<string, []>("op_475_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_475_cast_fp16 = einsum(equation = var_475_equation_0, values = (var_449_cast_fp16_0, var_468_cast_fp16))[name = tensor<string, []>("op_475_cast_fp16")];
+            tensor<string, []> var_477_equation_0 = const()[name = tensor<string, []>("op_477_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_477_cast_fp16 = einsum(equation = var_477_equation_0, values = (var_449_cast_fp16_1, var_469_cast_fp16))[name = tensor<string, []>("op_477_cast_fp16")];
+            tensor<string, []> var_479_equation_0 = const()[name = tensor<string, []>("op_479_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_449_cast_fp16_2, var_470_cast_fp16))[name = tensor<string, []>("op_479_cast_fp16")];
+            tensor<string, []> var_481_equation_0 = const()[name = tensor<string, []>("op_481_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_481_cast_fp16 = einsum(equation = var_481_equation_0, values = (var_449_cast_fp16_3, var_471_cast_fp16))[name = tensor<string, []>("op_481_cast_fp16")];
+            tensor<string, []> var_483_equation_0 = const()[name = tensor<string, []>("op_483_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_449_cast_fp16_4, var_472_cast_fp16))[name = tensor<string, []>("op_483_cast_fp16")];
+            tensor<string, []> var_485_equation_0 = const()[name = tensor<string, []>("op_485_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_485_cast_fp16 = einsum(equation = var_485_equation_0, values = (var_449_cast_fp16_5, var_473_cast_fp16))[name = tensor<string, []>("op_485_cast_fp16")];
+            tensor<bool, []> input_25_interleave_0 = const()[name = tensor<string, []>("input_25_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 1500]> input_25_cast_fp16 = concat(axis = var_382, interleave = input_25_interleave_0, values = (var_475_cast_fp16, var_477_cast_fp16, var_479_cast_fp16, var_481_cast_fp16, var_483_cast_fp16, var_485_cast_fp16))[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<string, []> var_494_pad_type_0 = const()[name = tensor<string, []>("op_494_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_494_strides_0 = const()[name = tensor<string, []>("op_494_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_494_pad_0 = const()[name = tensor<string, []>("op_494_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_494_dilations_0 = const()[name = tensor<string, []>("op_494_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_494_groups_0 = const()[name = tensor<string, []>("op_494_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_2_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10209472)))];
+            tensor<fp16, [384]> blocks_2_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_attn_out_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10504448)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_494_cast_fp16 = conv(bias = blocks_2_attn_out_bias_to_fp16, dilations = var_494_dilations_0, groups = var_494_groups_0, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_494_strides_0, weight = blocks_2_attn_out_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("op_494_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = var_494_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> input_27_axes_0 = const()[name = tensor<string, []>("input_27_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> input_27_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_27_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10505280)))];
+            tensor<fp16, [384]> input_27_beta_0_to_fp16 = const()[name = tensor<string, []>("input_27_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10506112)))];
+            tensor<fp16, []> var_504_to_fp16 = const()[name = tensor<string, []>("op_504_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_27_cast_fp16 = layer_norm(axes = input_27_axes_0, beta = input_27_beta_0_to_fp16, epsilon = var_504_to_fp16, gamma = input_27_gamma_0_to_fp16, x = inputs_11_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<string, []> input_29_pad_type_0 = const()[name = tensor<string, []>("input_29_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_29_strides_0 = const()[name = tensor<string, []>("input_29_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_29_pad_0 = const()[name = tensor<string, []>("input_29_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_29_dilations_0 = const()[name = tensor<string, []>("input_29_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_29_groups_0 = const()[name = tensor<string, []>("input_29_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1536, 384, 1, 1]> blocks_2_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10506944)))];
+            tensor<fp16, [1536]> blocks_2_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_0_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11686656)))];
+            tensor<fp16, [1, 1536, 1, 1500]> input_29_cast_fp16 = conv(bias = blocks_2_mlp_0_bias_to_fp16, dilations = input_29_dilations_0, groups = input_29_groups_0, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = input_29_strides_0, weight = blocks_2_mlp_0_weight_to_fp16, x = input_27_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<string, []> var_530_pad_type_0 = const()[name = tensor<string, []>("op_530_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_530_strides_0 = const()[name = tensor<string, []>("op_530_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_530_pad_0 = const()[name = tensor<string, []>("op_530_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_530_dilations_0 = const()[name = tensor<string, []>("op_530_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_530_groups_0 = const()[name = tensor<string, []>("op_530_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 1536, 1, 1]> blocks_2_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11689792)))];
+            tensor<fp16, [384]> blocks_2_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_2_mlp_2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12869504)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_530_cast_fp16 = conv(bias = blocks_2_mlp_2_bias_to_fp16, dilations = var_530_dilations_0, groups = var_530_groups_0, pad = var_530_pad_0, pad_type = var_530_pad_type_0, strides = var_530_strides_0, weight = blocks_2_mlp_2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("op_530_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = var_530_cast_fp16)[name = tensor<string, []>("inputs_13_cast_fp16")];
+            tensor<int32, []> var_539 = const()[name = tensor<string, []>("op_539"), val = tensor<int32, []>(1)];
+            tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> input_33_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_33_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12870336)))];
+            tensor<fp16, [384]> input_33_beta_0_to_fp16 = const()[name = tensor<string, []>("input_33_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12871168)))];
+            tensor<fp16, []> var_555_to_fp16 = const()[name = tensor<string, []>("op_555_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, beta = input_33_beta_0_to_fp16, epsilon = var_555_to_fp16, gamma = input_33_gamma_0_to_fp16, x = inputs_13_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<string, []> q_pad_type_0 = const()[name = tensor<string, []>("q_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> q_strides_0 = const()[name = tensor<string, []>("q_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> q_pad_0 = const()[name = tensor<string, []>("q_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> q_dilations_0 = const()[name = tensor<string, []>("q_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> q_groups_0 = const()[name = tensor<string, []>("q_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> var_590_weight_0_to_fp16 = const()[name = tensor<string, []>("op_590_weight_0_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12872000)))];
+            tensor<fp16, [384]> var_590_bias_0_to_fp16 = const()[name = tensor<string, []>("op_590_bias_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13166976)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_590_cast_fp16 = conv(bias = var_590_bias_0_to_fp16, dilations = q_dilations_0, groups = q_groups_0, pad = q_pad_0, pad_type = q_pad_type_0, strides = q_strides_0, weight = var_590_weight_0_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_590_cast_fp16")];
+            tensor<string, []> k_pad_type_0 = const()[name = tensor<string, []>("k_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> k_strides_0 = const()[name = tensor<string, []>("k_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> k_pad_0 = const()[name = tensor<string, []>("k_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> k_dilations_0 = const()[name = tensor<string, []>("k_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> k_groups_0 = const()[name = tensor<string, []>("k_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_3_attn_key_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_key_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13167808)))];
+            tensor<fp16, [1, 384, 1, 1500]> k_cast_fp16 = conv(dilations = k_dilations_0, groups = k_groups_0, pad = k_pad_0, pad_type = k_pad_type_0, strides = k_strides_0, weight = blocks_3_attn_key_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("k_cast_fp16")];
+            tensor<string, []> var_588_pad_type_0 = const()[name = tensor<string, []>("op_588_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_588_strides_0 = const()[name = tensor<string, []>("op_588_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_588_pad_0 = const()[name = tensor<string, []>("op_588_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_588_dilations_0 = const()[name = tensor<string, []>("op_588_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_588_groups_0 = const()[name = tensor<string, []>("op_588_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_3_attn_value_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13462784)))];
+            tensor<fp16, [384]> blocks_3_attn_value_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_value_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13757760)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_588_cast_fp16 = conv(bias = blocks_3_attn_value_bias_to_fp16, dilations = var_588_dilations_0, groups = var_588_groups_0, pad = var_588_pad_0, pad_type = var_588_pad_type_0, strides = var_588_strides_0, weight = blocks_3_attn_value_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("op_588_cast_fp16")];
+            tensor<int32, [6]> tile_9 = const()[name = tensor<string, []>("tile_9"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_591_axis_0 = const()[name = tensor<string, []>("op_591_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_591_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_591_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_591_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_591_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_591_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_591_cast_fp16_5 = split(axis = var_591_axis_0, split_sizes = tile_9, x = var_590_cast_fp16)[name = tensor<string, []>("op_591_cast_fp16")];
+            tensor<int32, [4]> var_598_perm_0 = const()[name = tensor<string, []>("op_598_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [6]> tile_10 = const()[name = tensor<string, []>("tile_10"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_599_axis_0 = const()[name = tensor<string, []>("op_599_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 1500, 1, 384]> var_598_cast_fp16 = transpose(perm = var_598_perm_0, x = k_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1500, 1, 64]> var_599_cast_fp16_0, tensor<fp16, [1, 1500, 1, 64]> var_599_cast_fp16_1, tensor<fp16, [1, 1500, 1, 64]> var_599_cast_fp16_2, tensor<fp16, [1, 1500, 1, 64]> var_599_cast_fp16_3, tensor<fp16, [1, 1500, 1, 64]> var_599_cast_fp16_4, tensor<fp16, [1, 1500, 1, 64]> var_599_cast_fp16_5 = split(axis = var_599_axis_0, split_sizes = tile_10, x = var_598_cast_fp16)[name = tensor<string, []>("op_599_cast_fp16")];
+            tensor<int32, [6]> tile_11 = const()[name = tensor<string, []>("tile_11"), val = tensor<int32, [6]>([64, 64, 64, 64, 64, 64])];
+            tensor<int32, []> var_606_axis_0 = const()[name = tensor<string, []>("op_606_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 64, 1, 1500]> var_606_cast_fp16_0, tensor<fp16, [1, 64, 1, 1500]> var_606_cast_fp16_1, tensor<fp16, [1, 64, 1, 1500]> var_606_cast_fp16_2, tensor<fp16, [1, 64, 1, 1500]> var_606_cast_fp16_3, tensor<fp16, [1, 64, 1, 1500]> var_606_cast_fp16_4, tensor<fp16, [1, 64, 1, 1500]> var_606_cast_fp16_5 = split(axis = var_606_axis_0, split_sizes = tile_11, x = var_588_cast_fp16)[name = tensor<string, []>("op_606_cast_fp16")];
+            tensor<string, []> aw_37_equation_0 = const()[name = tensor<string, []>("aw_37_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_37_cast_fp16 = einsum(equation = aw_37_equation_0, values = (var_599_cast_fp16_0, var_591_cast_fp16_0))[name = tensor<string, []>("aw_37_cast_fp16")];
+            tensor<string, []> aw_39_equation_0 = const()[name = tensor<string, []>("aw_39_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_39_cast_fp16 = einsum(equation = aw_39_equation_0, values = (var_599_cast_fp16_1, var_591_cast_fp16_1))[name = tensor<string, []>("aw_39_cast_fp16")];
+            tensor<string, []> aw_41_equation_0 = const()[name = tensor<string, []>("aw_41_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_41_cast_fp16 = einsum(equation = aw_41_equation_0, values = (var_599_cast_fp16_2, var_591_cast_fp16_2))[name = tensor<string, []>("aw_41_cast_fp16")];
+            tensor<string, []> aw_43_equation_0 = const()[name = tensor<string, []>("aw_43_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_43_cast_fp16 = einsum(equation = aw_43_equation_0, values = (var_599_cast_fp16_3, var_591_cast_fp16_3))[name = tensor<string, []>("aw_43_cast_fp16")];
+            tensor<string, []> aw_45_equation_0 = const()[name = tensor<string, []>("aw_45_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_45_cast_fp16 = einsum(equation = aw_45_equation_0, values = (var_599_cast_fp16_4, var_591_cast_fp16_4))[name = tensor<string, []>("aw_45_cast_fp16")];
+            tensor<string, []> aw_equation_0 = const()[name = tensor<string, []>("aw_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 1500, 1, 1500]> aw_cast_fp16 = einsum(equation = aw_equation_0, values = (var_599_cast_fp16_5, var_591_cast_fp16_5))[name = tensor<string, []>("aw_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_625_cast_fp16 = softmax(axis = var_539, x = aw_37_cast_fp16)[name = tensor<string, []>("op_625_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_626_cast_fp16 = softmax(axis = var_539, x = aw_39_cast_fp16)[name = tensor<string, []>("op_626_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_627_cast_fp16 = softmax(axis = var_539, x = aw_41_cast_fp16)[name = tensor<string, []>("op_627_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_628_cast_fp16 = softmax(axis = var_539, x = aw_43_cast_fp16)[name = tensor<string, []>("op_628_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_629_cast_fp16 = softmax(axis = var_539, x = aw_45_cast_fp16)[name = tensor<string, []>("op_629_cast_fp16")];
+            tensor<fp16, [1, 1500, 1, 1500]> var_630_cast_fp16 = softmax(axis = var_539, x = aw_cast_fp16)[name = tensor<string, []>("op_630_cast_fp16")];
+            tensor<string, []> var_632_equation_0 = const()[name = tensor<string, []>("op_632_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_632_cast_fp16 = einsum(equation = var_632_equation_0, values = (var_606_cast_fp16_0, var_625_cast_fp16))[name = tensor<string, []>("op_632_cast_fp16")];
+            tensor<string, []> var_634_equation_0 = const()[name = tensor<string, []>("op_634_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_634_cast_fp16 = einsum(equation = var_634_equation_0, values = (var_606_cast_fp16_1, var_626_cast_fp16))[name = tensor<string, []>("op_634_cast_fp16")];
+            tensor<string, []> var_636_equation_0 = const()[name = tensor<string, []>("op_636_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_636_cast_fp16 = einsum(equation = var_636_equation_0, values = (var_606_cast_fp16_2, var_627_cast_fp16))[name = tensor<string, []>("op_636_cast_fp16")];
+            tensor<string, []> var_638_equation_0 = const()[name = tensor<string, []>("op_638_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_638_cast_fp16 = einsum(equation = var_638_equation_0, values = (var_606_cast_fp16_3, var_628_cast_fp16))[name = tensor<string, []>("op_638_cast_fp16")];
+            tensor<string, []> var_640_equation_0 = const()[name = tensor<string, []>("op_640_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_640_cast_fp16 = einsum(equation = var_640_equation_0, values = (var_606_cast_fp16_4, var_629_cast_fp16))[name = tensor<string, []>("op_640_cast_fp16")];
+            tensor<string, []> var_642_equation_0 = const()[name = tensor<string, []>("op_642_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 64, 1, 1500]> var_642_cast_fp16 = einsum(equation = var_642_equation_0, values = (var_606_cast_fp16_5, var_630_cast_fp16))[name = tensor<string, []>("op_642_cast_fp16")];
+            tensor<bool, []> input_35_interleave_0 = const()[name = tensor<string, []>("input_35_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 1500]> input_35_cast_fp16 = concat(axis = var_539, interleave = input_35_interleave_0, values = (var_632_cast_fp16, var_634_cast_fp16, var_636_cast_fp16, var_638_cast_fp16, var_640_cast_fp16, var_642_cast_fp16))[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<string, []> var_651_pad_type_0 = const()[name = tensor<string, []>("op_651_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_651_strides_0 = const()[name = tensor<string, []>("op_651_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_651_pad_0 = const()[name = tensor<string, []>("op_651_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_651_dilations_0 = const()[name = tensor<string, []>("op_651_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_651_groups_0 = const()[name = tensor<string, []>("op_651_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 384, 1, 1]> blocks_3_attn_out_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_weight_to_fp16"), val = tensor<fp16, [384, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13758592)))];
+            tensor<fp16, [384]> blocks_3_attn_out_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_attn_out_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14053568)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_651_cast_fp16 = conv(bias = blocks_3_attn_out_bias_to_fp16, dilations = var_651_dilations_0, groups = var_651_groups_0, pad = var_651_pad_0, pad_type = var_651_pad_type_0, strides = var_651_strides_0, weight = blocks_3_attn_out_weight_to_fp16, x = input_35_cast_fp16)[name = tensor<string, []>("op_651_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = var_651_cast_fp16)[name = tensor<string, []>("inputs_15_cast_fp16")];
+            tensor<int32, [1]> input_37_axes_0 = const()[name = tensor<string, []>("input_37_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> input_37_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_37_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14054400)))];
+            tensor<fp16, [384]> input_37_beta_0_to_fp16 = const()[name = tensor<string, []>("input_37_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14055232)))];
+            tensor<fp16, []> var_661_to_fp16 = const()[name = tensor<string, []>("op_661_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = input_37_beta_0_to_fp16, epsilon = var_661_to_fp16, gamma = input_37_gamma_0_to_fp16, x = inputs_15_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<string, []> input_39_pad_type_0 = const()[name = tensor<string, []>("input_39_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> input_39_strides_0 = const()[name = tensor<string, []>("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> input_39_pad_0 = const()[name = tensor<string, []>("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> input_39_dilations_0 = const()[name = tensor<string, []>("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> input_39_groups_0 = const()[name = tensor<string, []>("input_39_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1536, 384, 1, 1]> blocks_3_mlp_0_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_weight_to_fp16"), val = tensor<fp16, [1536, 384, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(14056064)))];
+            tensor<fp16, [1536]> blocks_3_mlp_0_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_0_bias_to_fp16"), val = tensor<fp16, [1536]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15235776)))];
+            tensor<fp16, [1, 1536, 1, 1500]> input_39_cast_fp16 = conv(bias = blocks_3_mlp_0_bias_to_fp16, dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = blocks_3_mlp_0_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 1500]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_39_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<string, []> var_687_pad_type_0 = const()[name = tensor<string, []>("op_687_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> var_687_strides_0 = const()[name = tensor<string, []>("op_687_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_687_pad_0 = const()[name = tensor<string, []>("op_687_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_687_dilations_0 = const()[name = tensor<string, []>("op_687_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, []> var_687_groups_0 = const()[name = tensor<string, []>("op_687_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [384, 1536, 1, 1]> blocks_3_mlp_2_weight_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_weight_to_fp16"), val = tensor<fp16, [384, 1536, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15238912)))];
+            tensor<fp16, [384]> blocks_3_mlp_2_bias_to_fp16 = const()[name = tensor<string, []>("blocks_3_mlp_2_bias_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16418624)))];
+            tensor<fp16, [1, 384, 1, 1500]> var_687_cast_fp16 = conv(bias = blocks_3_mlp_2_bias_to_fp16, dilations = var_687_dilations_0, groups = var_687_groups_0, pad = var_687_pad_0, pad_type = var_687_pad_type_0, strides = var_687_strides_0, weight = blocks_3_mlp_2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("op_687_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 1500]> inputs_cast_fp16 = add(x = inputs_15_cast_fp16, y = var_687_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<int32, [1]> x_axes_0 = const()[name = tensor<string, []>("x_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [384]> x_gamma_0_to_fp16 = const()[name = tensor<string, []>("x_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16419456)))];
+            tensor<fp16, [384]> x_beta_0_to_fp16 = const()[name = tensor<string, []>("x_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16420288)))];
+            tensor<fp16, []> var_701_to_fp16 = const()[name = tensor<string, []>("op_701_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 1500]> x_cast_fp16 = layer_norm(axes = x_axes_0, beta = x_beta_0_to_fp16, epsilon = var_701_to_fp16, gamma = x_gamma_0_to_fp16, x = inputs_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
+            tensor<int32, [1]> var_712_axes_0 = const()[name = tensor<string, []>("op_712_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 384, 1500]> var_712_cast_fp16 = squeeze(axes = var_712_axes_0, x = x_cast_fp16)[name = tensor<string, []>("op_712_cast_fp16")];
+            tensor<int32, [3]> var_715_perm_0 = const()[name = tensor<string, []>("op_715_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<string, []> var_715_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_715_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
+            tensor<fp16, [1, 1500, 384]> var_715_cast_fp16 = transpose(perm = var_715_perm_0, x = var_712_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp32, [1, 1500, 384]> output = cast(dtype = var_715_cast_fp16_to_fp32_dtype_0, x = var_715_cast_fp16)[name = tensor<string, []>("cast_19")];
+        } -> (output);
+}
\ No newline at end of file
diff --git a/tiny/ggml-tiny-encoder.mlmodelc/weights/weight.bin b/tiny/ggml-tiny-encoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..59aa32c2ef3eb1490176f23728b6407163bd8b1b
--- /dev/null
+++ b/tiny/ggml-tiny-encoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17aa929eb61d433fa68217d73a8aec7125af1b4afe39b5c8f27d61d80e2c1a80
+size 16421120
diff --git a/tiny/ggml-tiny.bin b/tiny/ggml-tiny.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d144f735b005ae8cbfa04a49e22fe40faa24dbec
--- /dev/null
+++ b/tiny/ggml-tiny.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be07e048e1e599ad46341c8d2a135645097a538221678b7acdd1b1919c6e1b21
+size 77691713