alexwengg commited on May 9

Commit

80af6a2

verified ·

1 Parent(s): 654eeea

Upload 269 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

iteration_3/README.md +109 -0
iteration_3/compiled/.DS_Store +0 -0
iteration_3/compiled/bert_fp16_t128.mlmodelc/analytics/coremldata.bin +3 -0
iteration_3/compiled/bert_fp16_t128.mlmodelc/coremldata.bin +3 -0
iteration_3/compiled/bert_fp16_t128.mlmodelc/metadata.json +94 -0
iteration_3/compiled/bert_fp16_t128.mlmodelc/model.mil +442 -0
iteration_3/compiled/bert_fp16_t128.mlmodelc/weights/weight.bin +3 -0
iteration_3/compiled/bert_fp16_t256.mlmodelc/analytics/coremldata.bin +3 -0
iteration_3/compiled/bert_fp16_t256.mlmodelc/coremldata.bin +3 -0
iteration_3/compiled/bert_fp16_t256.mlmodelc/metadata.json +94 -0
iteration_3/compiled/bert_fp16_t256.mlmodelc/model.mil +442 -0
iteration_3/compiled/bert_fp16_t256.mlmodelc/weights/weight.bin +3 -0
iteration_3/compiled/bert_fp16_t64.mlmodelc/analytics/coremldata.bin +3 -0
iteration_3/compiled/bert_fp16_t64.mlmodelc/coremldata.bin +3 -0
iteration_3/compiled/bert_fp16_t64.mlmodelc/metadata.json +94 -0
iteration_3/compiled/bert_fp16_t64.mlmodelc/model.mil +442 -0
iteration_3/compiled/bert_fp16_t64.mlmodelc/weights/weight.bin +3 -0
iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/analytics/coremldata.bin +3 -0
iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/coremldata.bin +3 -0
iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/metadata.json +110 -0
iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/model.mil +0 -0
iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/weights/weight.bin +3 -0
iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/analytics/coremldata.bin +3 -0
iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/coremldata.bin +3 -0
iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/metadata.json +110 -0
iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/model.mil +0 -0
iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/weights/weight.bin +3 -0
iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/analytics/coremldata.bin +3 -0
iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/coremldata.bin +3 -0
iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/metadata.json +110 -0
iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/model.mil +0 -0
iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/weights/weight.bin +3 -0
iteration_3/packages/.DS_Store +0 -0
iteration_3/packages/bert_fp16_t128.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
iteration_3/packages/bert_fp16_t128.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
iteration_3/packages/bert_fp16_t128.mlpackage/Manifest.json +18 -0
iteration_3/packages/bert_fp16_t256.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
iteration_3/packages/bert_fp16_t256.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
iteration_3/packages/bert_fp16_t256.mlpackage/Manifest.json +18 -0
iteration_3/packages/bert_fp16_t64.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
iteration_3/packages/bert_fp16_t64.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
iteration_3/packages/bert_fp16_t64.mlpackage/Manifest.json +18 -0
iteration_3/packages/fused_diffusion_sampler_fp16_t128.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
iteration_3/packages/fused_diffusion_sampler_fp16_t128.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
iteration_3/packages/fused_diffusion_sampler_fp16_t128.mlpackage/Manifest.json +18 -0
iteration_3/packages/fused_diffusion_sampler_fp16_t256.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
iteration_3/packages/fused_diffusion_sampler_fp16_t256.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
iteration_3/packages/fused_diffusion_sampler_fp16_t256.mlpackage/Manifest.json +18 -0
iteration_3/packages/fused_diffusion_sampler_fp16_t64.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
iteration_3/packages/fused_diffusion_sampler_fp16_t64.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0

iteration_3/README.md CHANGED Viewed

@@ -113,3 +113,112 @@ python -m coreml.inference --fp32
 Other quantization tiers (int8 weight-only, int4 palettization) deferred
 to a future iteration — fp16 already pays for itself on disk and warm
 latency.

 Other quantization tiers (int8 weight-only, int4 palettization) deferred
 to a future iteration — fp16 already pays for itself on disk and warm
 latency.
+## Token-axis buckets (Trial 11)
+The `bert` and `fused_diffusion_sampler` packages reject `ct.RangeDim`
+on the token axis (HF Albert + cross-attn produce ops MIL refuses with
+"data-dependent shapes were disabled"). The default packages above
+hard-code T = 57, which caps prompts at ~37 chars.
+To support longer prompts without RangeDim, this iteration ships
+**three additional fixed-T variants** of each constrained stage:
+| File                                              | Compute      | Size  |
+|---------------------------------------------------|--------------|-------|
+| `bert_fp16_t64.mlpackage`                         | ALL          | 12 MB |
+| `bert_fp16_t128.mlpackage`                        | ALL          | 12 MB |
+| `bert_fp16_t256.mlpackage`                        | ALL          | 12 MB |
+| `fused_diffusion_sampler_fp16_t64.mlpackage`      | ALL          | 48 MB |
+| `fused_diffusion_sampler_fp16_t128.mlpackage`     | ALL          | 48 MB |
+| `fused_diffusion_sampler_fp16_t256.mlpackage`     | ALL          | 48 MB |
+| **Sub-total (extra over the 8 defaults)**         |              | **180 MB** |
+The original `bert_fp16.mlpackage` / `fused_diffusion_sampler_fp16.mlpackage`
+(T = 57) remain in the manifest as the default fast path — every
+sentence that fits T = 57 should keep using them. The bucketed variants
+are loaded on demand for longer prompts.
+Loader policy (Swift / Python):
+```
+real_n = #espeak tokens
+if   real_n <=  57: use *_fp16.mlpackage          (default)
+elif real_n <=  64: use *_fp16_t64.mlpackage
+elif real_n <= 128: use *_fp16_t128.mlpackage
+elif real_n <= 256: use *_fp16_t256.mlpackage
+else: error (extend the bucket ladder)
+```
+Pad the token + attention_mask tensors with zeros to the chosen
+bucket's T. `bert` honours `attention_mask`, so contamination at
+padded positions is bounded; the sampler attends to bert output, so
+it inherits the same masking.
+Per-bucket end-to-end inference verified by `coreml/inference_buckets.py
+--all` (writes `coreml/out_t{64,128,256}.wav`):
+| Bucket | Prompt                                     | Tokens | Audio  | Pipeline |
+|--------|--------------------------------------------|--------|--------|----------|
+| 64     | "Hello there. How are you today?"          | 36     | 2.42 s |  494 ms  |
+| 128    | "StyleTTS 2 is a text to speech model."    | 57     | 3.60 s |  414 ms  |
+| 256    | longer paragraph (see `inference_buckets.py`) | 154 | 8.37 s | 4933 ms  |
+T = 256 cost is dominated by `decoder_upsample` at 4.5 s / 4.9 s
+(real-time-ish CPU_ONLY at 24 kHz × 8.4 s output). Bucket-swap cost
+itself is a few ms; the rest of the pipeline scales with output
+frame count, not bucket size.
+**Total iteration_3 footprint with buckets: 451 MB** (274 MB defaults
++ 180 MB buckets), or skip the T = 57 defaults entirely and ship only
+buckets to save ~60 MB.
+### Build / refresh the bucketed packages
+```bash
+cd models/tts/styletts2
+# Build buckets (writes to coreml/packages/, run once)
+uv run python coreml/build_buckets.py \
+    --buckets 64,128,256 --stages bert,sampler --precision fp16
+# Stage into iteration_3 + compile
+for T in 64 128 256; do
+  for stage in bert fused_diffusion_sampler; do
+    cp -R "coreml/packages/${stage}_fp16_t${T}.mlpackage" \
+          "iteration_3/packages/${stage}_fp16_t${T}.mlpackage"
+    xcrun coremlcompiler compile \
+      "iteration_3/packages/${stage}_fp16_t${T}.mlpackage" \
+      "iteration_3/compiled/"
+  done
+done
+# Validate
+uv run python coreml/inference_buckets.py --all --output-dir coreml
+```
+### HuggingFace upload manifest
+Upload the entire `iteration_3/packages/` tree (14 mlpackages):
+```
+iteration_3/packages/
+├── text_encoder_fp16.mlpackage
+├── bert_fp16.mlpackage                              ← T=57 default
+├── bert_fp16_t64.mlpackage                          ← bucket
+├── bert_fp16_t128.mlpackage                         ← bucket
+├── bert_fp16_t256.mlpackage                         ← bucket
+├── ref_encoder_fp16.mlpackage
+├── fused_diffusion_sampler_fp16.mlpackage           ← T=57 default
+├── fused_diffusion_sampler_fp16_t64.mlpackage       ← bucket
+├── fused_diffusion_sampler_fp16_t128.mlpackage      ← bucket
+├── fused_diffusion_sampler_fp16_t256.mlpackage      ← bucket
+├── duration_predictor_fp16.mlpackage
+├── fused_f0n_har_source.mlpackage                   ← fp32 (cumsum drift)
+├── decoder_pre_fp16.mlpackage
+└── decoder_upsample_fp16.mlpackage
+```
+Total: **451 MB** (12 fp16 stages + 1 fp32 stage + 1 cumsum-sensitive
+stage). Compiled `.mlmodelc` siblings live next to the packages in
+`iteration_3/compiled/` — same file count, same total size.

iteration_3/compiled/.DS_Store CHANGED Viewed

Binary files a/iteration_3/compiled/.DS_Store and b/iteration_3/compiled/.DS_Store differ

iteration_3/compiled/bert_fp16_t128.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ffbc105f1a1ce78756729151d8f8d6669f0dc418d5146ea32f26c26bb6fb555
+size 243

iteration_3/compiled/bert_fp16_t128.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:605c075757566cb93de9f4cb848a115ab2e586ab678d134e86fbb1d7646ea28b
+size 441

iteration_3/compiled/bert_fp16_t128.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,94 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 128 × 768)",
+        "shortDescription" : "",
+        "shape" : "[1, 128, 768]",
+        "name" : "sequence_output",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 512 × 128)",
+        "shortDescription" : "",
+        "shape" : "[1, 512, 128]",
+        "name" : "var_1030",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.linear" : 74,
+      "Ios18.scaledDotProductAttention" : 12,
+      "Ios18.sub" : 1,
+      "Select" : 2,
+      "Ios18.expandDims" : 2,
+      "Ios18.gelu" : 12,
+      "Ios18.gather" : 1,
+      "Ios18.add" : 27,
+      "Tile" : 1,
+      "Ios18.layerNorm" : 25,
+      "Ios18.transpose" : 49,
+      "Ios18.cast" : 5,
+      "Ios18.reshape" : 48,
+      "Ios18.greaterEqual" : 1
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2026-05-08",
+      "com.github.apple.coremltools.source" : "torch==2.11.0",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1 × 128)",
+        "shortDescription" : "",
+        "shape" : "[1, 128]",
+        "name" : "tokens",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1 × 128)",
+        "shortDescription" : "",
+        "shape" : "[1, 128]",
+        "name" : "attention_mask",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "bert_fp16_t128",
+    "method" : "predict"
+  }
+]

iteration_3/compiled/bert_fp16_t128.mlmodelc/model.mil ADDED Viewed

	@@ -0,0 +1,442 @@

+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.11.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
+{
+    func main<ios18>(tensor<int32, [1, 128]> attention_mask, tensor<int32, [1, 128]> tokens) {
+            int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)];
+            bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [178, 128]> bert_embeddings_word_embeddings_weight_to_fp16 = const()[name = string("bert_embeddings_word_embeddings_weight_to_fp16"), val = tensor<fp16, [178, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            string tokens_to_int16_dtype_0 = const()[name = string("tokens_to_int16_dtype_0"), val = string("int16")];
+            string cast_53_dtype_0 = const()[name = string("cast_53_dtype_0"), val = string("int32")];
+            int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
+            tensor<int16, [1, 128]> tokens_to_int16 = cast(dtype = tokens_to_int16_dtype_0, x = tokens)[name = string("cast_58")];
+            tensor<int32, [1, 128]> cast_53 = cast(dtype = cast_53_dtype_0, x = tokens_to_int16)[name = string("cast_57")];
+            tensor<bool, [1, 128]> greater_equal_0 = greater_equal(x = cast_53, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
+            int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(178)];
+            tensor<int32, [1, 128]> add_0 = add(x = cast_53, y = slice_by_index_0)[name = string("add_0")];
+            tensor<int32, [1, 128]> select_0 = select(a = cast_53, b = add_0, cond = greater_equal_0)[name = string("select_0")];
+            int32 inputs_embeds_cast_fp16_cast_uint16_axis_0 = const()[name = string("inputs_embeds_cast_fp16_cast_uint16_axis_0"), val = int32(0)];
+            string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")];
+            tensor<int16, [1, 128]> select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_56")];
+            tensor<fp16, [1, 128, 128]> inputs_embeds_cast_fp16_cast_uint16_cast_uint16 = gather(axis = inputs_embeds_cast_fp16_cast_uint16_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = select_0_to_int16, validate_indices = inputs_embeds_validate_indices_0, x = bert_embeddings_word_embeddings_weight_to_fp16)[name = string("inputs_embeds_cast_fp16_cast_uint16_cast_uint16")];
+            tensor<fp16, [1, 128, 128]> token_type_embeddings_1_to_fp16 = const()[name = string("token_type_embeddings_1_to_fp16"), val = tensor<fp16, [1, 128, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45696)))];
+            tensor<fp16, [1, 128, 128]> embeddings_1_cast_fp16 = add(x = inputs_embeds_cast_fp16_cast_uint16_cast_uint16, y = token_type_embeddings_1_to_fp16)[name = string("embeddings_1_cast_fp16")];
+            tensor<fp16, [1, 128, 128]> position_embeddings_1_to_fp16 = const()[name = string("position_embeddings_1_to_fp16"), val = tensor<fp16, [1, 128, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78528)))];
+            tensor<fp16, [1, 128, 128]> input_5_cast_fp16 = add(x = embeddings_1_cast_fp16, y = position_embeddings_1_to_fp16)[name = string("input_5_cast_fp16")];
+            tensor<int32, [1]> input_7_axes_0 = const()[name = string("input_7_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [128]> bert_embeddings_LayerNorm_weight_to_fp16 = const()[name = string("bert_embeddings_LayerNorm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111360)))];
+            tensor<fp16, [128]> bert_embeddings_LayerNorm_bias_to_fp16 = const()[name = string("bert_embeddings_LayerNorm_bias_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111680)))];
+            fp16 var_34_to_fp16 = const()[name = string("op_34_to_fp16"), val = fp16(0x1p-24)];
+            tensor<fp16, [1, 128, 128]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = bert_embeddings_LayerNorm_bias_to_fp16, epsilon = var_34_to_fp16, gamma = bert_embeddings_LayerNorm_weight_to_fp16, x = input_5_cast_fp16)[name = string("input_7_cast_fp16")];
+            tensor<int32, [1]> var_79_axes_0 = const()[name = string("op_79_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1, 1, 128]> var_79 = expand_dims(axes = var_79_axes_0, x = attention_mask)[name = string("op_79")];
+            tensor<int32, [1]> var_81_axes_0 = const()[name = string("op_81_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1, 1, 1, 128]> var_81 = expand_dims(axes = var_81_axes_0, x = var_79)[name = string("op_81")];
+            tensor<int32, [4]> var_90_reps_0 = const()[name = string("op_90_reps_0"), val = tensor<int32, [4]>([1, 1, 128, 1])];
+            tensor<int32, [1, 1, 128, 128]> var_90 = tile(reps = var_90_reps_0, x = var_81)[name = string("op_90")];
+            fp16 var_96_to_fp16 = const()[name = string("op_96_to_fp16"), val = fp16(0x1p+0)];
+            string var_95_to_fp16_dtype_0 = const()[name = string("op_95_to_fp16_dtype_0"), val = string("fp16")];
+            tensor<fp16, [1, 1, 128, 128]> var_90_to_fp16 = cast(dtype = var_95_to_fp16_dtype_0, x = var_90)[name = string("cast_55")];
+            tensor<fp16, [1, 1, 128, 128]> inverted_mask_cast_fp16 = sub(x = var_96_to_fp16, y = var_90_to_fp16)[name = string("inverted_mask_cast_fp16")];
+            string var_103_dtype_0 = const()[name = string("op_103_dtype_0"), val = string("bool")];
+            fp16 var_104_to_fp16 = const()[name = string("op_104_to_fp16"), val = fp16(-inf)];
+            tensor<bool, [1, 1, 128, 128]> inverted_mask_cast_fp16_to_bool = cast(dtype = var_103_dtype_0, x = inverted_mask_cast_fp16)[name = string("cast_54")];
+            tensor<fp16, [1, 1, 128, 128]> attention_mask_cast_fp16 = select(a = var_104_to_fp16, b = inverted_mask_cast_fp16, cond = inverted_mask_cast_fp16_to_bool)[name = string("attention_mask_cast_fp16")];
+            tensor<fp16, [768, 128]> bert_encoder_embedding_hidden_mapping_in_weight_to_fp16 = const()[name = string("bert_encoder_embedding_hidden_mapping_in_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112000)))];
+            tensor<fp16, [768]> bert_encoder_embedding_hidden_mapping_in_bias_to_fp16 = const()[name = string("bert_encoder_embedding_hidden_mapping_in_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308672)))];
+            tensor<fp16, [1, 128, 768]> linear_0_cast_fp16 = linear(bias = bert_encoder_embedding_hidden_mapping_in_bias_to_fp16, weight = bert_encoder_embedding_hidden_mapping_in_weight_to_fp16, x = input_7_cast_fp16)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(310272)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1489984)))];
+            tensor<fp16, [1, 128, 768]> linear_1_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_1_cast_fp16")];
+            tensor<int32, [4]> var_143 = const()[name = string("op_143"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_3_cast_fp16 = reshape(shape = var_143, x = linear_1_cast_fp16)[name = string("x_3_cast_fp16")];
+            tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1491584)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2671296)))];
+            tensor<fp16, [1, 128, 768]> linear_2_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_2_cast_fp16")];
+            tensor<int32, [4]> var_152 = const()[name = string("op_152"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_7_cast_fp16 = reshape(shape = var_152, x = linear_2_cast_fp16)[name = string("x_7_cast_fp16")];
+            tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2672896)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3852608)))];
+            tensor<fp16, [1, 128, 768]> linear_3_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_3_cast_fp16")];
+            tensor<int32, [4]> var_161 = const()[name = string("op_161"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_11_cast_fp16 = reshape(shape = var_161, x = linear_3_cast_fp16)[name = string("x_11_cast_fp16")];
+            tensor<int32, [4]> transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 128, 64]> transpose_74 = transpose(perm = transpose_74_perm_0, x = x_11_cast_fp16)[name = string("transpose_154")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_73 = transpose(perm = transpose_73_perm_0, x = x_7_cast_fp16)[name = string("transpose_155")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_72 = transpose(perm = transpose_72_perm_0, x = x_3_cast_fp16)[name = string("transpose_156")];
+            tensor<fp16, [1, 12, 128, 64]> attention_output_1_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_73, query = transpose_72, value = transpose_74)[name = string("attention_output_1_cast_fp16")];
+            tensor<int32, [4]> attention_output_3_perm_0 = const()[name = string("attention_output_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_167 = const()[name = string("op_167"), val = tensor<int32, [3]>([1, 128, 768])];
+            tensor<fp16, [1, 128, 12, 64]> attention_output_3_cast_fp16 = transpose(perm = attention_output_3_perm_0, x = attention_output_1_cast_fp16)[name = string("transpose_153")];
+            tensor<fp16, [1, 128, 768]> input_9_cast_fp16 = reshape(shape = var_167, x = attention_output_3_cast_fp16)[name = string("input_9_cast_fp16")];
+            tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3854208)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5033920)))];
+            tensor<fp16, [1, 128, 768]> linear_4_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_9_cast_fp16)[name = string("linear_4_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_11_cast_fp16 = add(x = linear_0_cast_fp16, y = linear_4_cast_fp16)[name = string("input_11_cast_fp16")];
+            tensor<int32, [1]> input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5035520)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5037120)))];
+            fp16 var_118_to_fp16 = const()[name = string("op_118_to_fp16"), val = fp16(0x1p-24)];
+            tensor<fp16, [1, 128, 768]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_11_cast_fp16)[name = string("input_13_cast_fp16")];
+            tensor<fp16, [2048, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16"), val = tensor<fp16, [2048, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5038720)))];
+            tensor<fp16, [2048]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8184512)))];
+            tensor<fp16, [1, 128, 2048]> linear_5_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_13_cast_fp16)[name = string("linear_5_cast_fp16")];
+            string input_17_mode_0 = const()[name = string("input_17_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 128, 2048]> input_17_cast_fp16 = gelu(mode = input_17_mode_0, x = linear_5_cast_fp16)[name = string("input_17_cast_fp16")];
+            tensor<fp16, [768, 2048]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16"), val = tensor<fp16, [768, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8188672)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11334464)))];
+            tensor<fp16, [1, 128, 768]> linear_6_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_17_cast_fp16)[name = string("linear_6_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_19_cast_fp16 = add(x = linear_6_cast_fp16, y = input_13_cast_fp16)[name = string("input_19_cast_fp16")];
+            tensor<int32, [1]> hidden_states_3_axes_0 = const()[name = string("hidden_states_3_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11336064)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11337664)))];
+            tensor<fp16, [1, 128, 768]> hidden_states_3_cast_fp16 = layer_norm(axes = hidden_states_3_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_19_cast_fp16)[name = string("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_7_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_7_cast_fp16")];
+            tensor<int32, [4]> var_218 = const()[name = string("op_218"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_15_cast_fp16 = reshape(shape = var_218, x = linear_7_cast_fp16)[name = string("x_15_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_8_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_8_cast_fp16")];
+            tensor<int32, [4]> var_227 = const()[name = string("op_227"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_19_cast_fp16 = reshape(shape = var_227, x = linear_8_cast_fp16)[name = string("x_19_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_9_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_9_cast_fp16")];
+            tensor<int32, [4]> var_236 = const()[name = string("op_236"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_23_cast_fp16 = reshape(shape = var_236, x = linear_9_cast_fp16)[name = string("x_23_cast_fp16")];
+            tensor<int32, [4]> transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 128, 64]> transpose_77 = transpose(perm = transpose_77_perm_0, x = x_23_cast_fp16)[name = string("transpose_150")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_76 = transpose(perm = transpose_76_perm_0, x = x_19_cast_fp16)[name = string("transpose_151")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_75 = transpose(perm = transpose_75_perm_0, x = x_15_cast_fp16)[name = string("transpose_152")];
+            tensor<fp16, [1, 12, 128, 64]> attention_output_5_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_76, query = transpose_75, value = transpose_77)[name = string("attention_output_5_cast_fp16")];
+            tensor<int32, [4]> attention_output_7_perm_0 = const()[name = string("attention_output_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_242 = const()[name = string("op_242"), val = tensor<int32, [3]>([1, 128, 768])];
+            tensor<fp16, [1, 128, 12, 64]> attention_output_7_cast_fp16 = transpose(perm = attention_output_7_perm_0, x = attention_output_5_cast_fp16)[name = string("transpose_149")];
+            tensor<fp16, [1, 128, 768]> input_21_cast_fp16 = reshape(shape = var_242, x = attention_output_7_cast_fp16)[name = string("input_21_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_10_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_21_cast_fp16)[name = string("linear_10_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_23_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = linear_10_cast_fp16)[name = string("input_23_cast_fp16")];
+            tensor<int32, [1]> input_25_axes_0 = const()[name = string("input_25_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> input_25_cast_fp16 = layer_norm(axes = input_25_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_23_cast_fp16)[name = string("input_25_cast_fp16")];
+            tensor<fp16, [1, 128, 2048]> linear_11_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_25_cast_fp16)[name = string("linear_11_cast_fp16")];
+            string input_29_mode_0 = const()[name = string("input_29_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 128, 2048]> input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = linear_11_cast_fp16)[name = string("input_29_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_12_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_29_cast_fp16)[name = string("linear_12_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_31_cast_fp16 = add(x = linear_12_cast_fp16, y = input_25_cast_fp16)[name = string("input_31_cast_fp16")];
+            tensor<int32, [1]> hidden_states_5_axes_0 = const()[name = string("hidden_states_5_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> hidden_states_5_cast_fp16 = layer_norm(axes = hidden_states_5_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_31_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_13_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_13_cast_fp16")];
+            tensor<int32, [4]> var_293 = const()[name = string("op_293"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_27_cast_fp16 = reshape(shape = var_293, x = linear_13_cast_fp16)[name = string("x_27_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_14_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_14_cast_fp16")];
+            tensor<int32, [4]> var_302 = const()[name = string("op_302"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_31_cast_fp16 = reshape(shape = var_302, x = linear_14_cast_fp16)[name = string("x_31_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_15_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_15_cast_fp16")];
+            tensor<int32, [4]> var_311 = const()[name = string("op_311"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_35_cast_fp16 = reshape(shape = var_311, x = linear_15_cast_fp16)[name = string("x_35_cast_fp16")];
+            tensor<int32, [4]> transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 128, 64]> transpose_80 = transpose(perm = transpose_80_perm_0, x = x_35_cast_fp16)[name = string("transpose_146")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_79 = transpose(perm = transpose_79_perm_0, x = x_31_cast_fp16)[name = string("transpose_147")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_78 = transpose(perm = transpose_78_perm_0, x = x_27_cast_fp16)[name = string("transpose_148")];
+            tensor<fp16, [1, 12, 128, 64]> attention_output_9_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_79, query = transpose_78, value = transpose_80)[name = string("attention_output_9_cast_fp16")];
+            tensor<int32, [4]> attention_output_11_perm_0 = const()[name = string("attention_output_11_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_317 = const()[name = string("op_317"), val = tensor<int32, [3]>([1, 128, 768])];
+            tensor<fp16, [1, 128, 12, 64]> attention_output_11_cast_fp16 = transpose(perm = attention_output_11_perm_0, x = attention_output_9_cast_fp16)[name = string("transpose_145")];
+            tensor<fp16, [1, 128, 768]> input_33_cast_fp16 = reshape(shape = var_317, x = attention_output_11_cast_fp16)[name = string("input_33_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_16_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_33_cast_fp16)[name = string("linear_16_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_35_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = linear_16_cast_fp16)[name = string("input_35_cast_fp16")];
+            tensor<int32, [1]> input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("input_37_cast_fp16")];
+            tensor<fp16, [1, 128, 2048]> linear_17_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_37_cast_fp16)[name = string("linear_17_cast_fp16")];
+            string input_41_mode_0 = const()[name = string("input_41_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 128, 2048]> input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = linear_17_cast_fp16)[name = string("input_41_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_18_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_41_cast_fp16)[name = string("linear_18_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_43_cast_fp16 = add(x = linear_18_cast_fp16, y = input_37_cast_fp16)[name = string("input_43_cast_fp16")];
+            tensor<int32, [1]> hidden_states_7_axes_0 = const()[name = string("hidden_states_7_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> hidden_states_7_cast_fp16 = layer_norm(axes = hidden_states_7_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_43_cast_fp16)[name = string("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_19_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_19_cast_fp16")];
+            tensor<int32, [4]> var_368 = const()[name = string("op_368"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_39_cast_fp16 = reshape(shape = var_368, x = linear_19_cast_fp16)[name = string("x_39_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_20_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_20_cast_fp16")];
+            tensor<int32, [4]> var_377 = const()[name = string("op_377"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_43_cast_fp16 = reshape(shape = var_377, x = linear_20_cast_fp16)[name = string("x_43_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_21_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_21_cast_fp16")];
+            tensor<int32, [4]> var_386 = const()[name = string("op_386"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_47_cast_fp16 = reshape(shape = var_386, x = linear_21_cast_fp16)[name = string("x_47_cast_fp16")];
+            tensor<int32, [4]> transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 128, 64]> transpose_83 = transpose(perm = transpose_83_perm_0, x = x_47_cast_fp16)[name = string("transpose_142")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_82 = transpose(perm = transpose_82_perm_0, x = x_43_cast_fp16)[name = string("transpose_143")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_81 = transpose(perm = transpose_81_perm_0, x = x_39_cast_fp16)[name = string("transpose_144")];
+            tensor<fp16, [1, 12, 128, 64]> attention_output_13_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_82, query = transpose_81, value = transpose_83)[name = string("attention_output_13_cast_fp16")];
+            tensor<int32, [4]> attention_output_15_perm_0 = const()[name = string("attention_output_15_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_392 = const()[name = string("op_392"), val = tensor<int32, [3]>([1, 128, 768])];
+            tensor<fp16, [1, 128, 12, 64]> attention_output_15_cast_fp16 = transpose(perm = attention_output_15_perm_0, x = attention_output_13_cast_fp16)[name = string("transpose_141")];
+            tensor<fp16, [1, 128, 768]> input_45_cast_fp16 = reshape(shape = var_392, x = attention_output_15_cast_fp16)[name = string("input_45_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_22_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_45_cast_fp16)[name = string("linear_22_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_47_cast_fp16 = add(x = hidden_states_7_cast_fp16, y = linear_22_cast_fp16)[name = string("input_47_cast_fp16")];
+            tensor<int32, [1]> input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> input_49_cast_fp16 = layer_norm(axes = input_49_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_47_cast_fp16)[name = string("input_49_cast_fp16")];
+            tensor<fp16, [1, 128, 2048]> linear_23_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_49_cast_fp16)[name = string("linear_23_cast_fp16")];
+            string input_53_mode_0 = const()[name = string("input_53_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 128, 2048]> input_53_cast_fp16 = gelu(mode = input_53_mode_0, x = linear_23_cast_fp16)[name = string("input_53_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_24_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_53_cast_fp16)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_55_cast_fp16 = add(x = linear_24_cast_fp16, y = input_49_cast_fp16)[name = string("input_55_cast_fp16")];
+            tensor<int32, [1]> hidden_states_9_axes_0 = const()[name = string("hidden_states_9_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> hidden_states_9_cast_fp16 = layer_norm(axes = hidden_states_9_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_55_cast_fp16)[name = string("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_25_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_25_cast_fp16")];
+            tensor<int32, [4]> var_443 = const()[name = string("op_443"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_51_cast_fp16 = reshape(shape = var_443, x = linear_25_cast_fp16)[name = string("x_51_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_26_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_26_cast_fp16")];
+            tensor<int32, [4]> var_452 = const()[name = string("op_452"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_55_cast_fp16 = reshape(shape = var_452, x = linear_26_cast_fp16)[name = string("x_55_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_27_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_27_cast_fp16")];
+            tensor<int32, [4]> var_461 = const()[name = string("op_461"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_59_cast_fp16 = reshape(shape = var_461, x = linear_27_cast_fp16)[name = string("x_59_cast_fp16")];
+            tensor<int32, [4]> transpose_84_perm_0 = const()[name = string("transpose_84_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_85_perm_0 = const()[name = string("transpose_85_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_86_perm_0 = const()[name = string("transpose_86_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 128, 64]> transpose_86 = transpose(perm = transpose_86_perm_0, x = x_59_cast_fp16)[name = string("transpose_138")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_85 = transpose(perm = transpose_85_perm_0, x = x_55_cast_fp16)[name = string("transpose_139")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_84 = transpose(perm = transpose_84_perm_0, x = x_51_cast_fp16)[name = string("transpose_140")];
+            tensor<fp16, [1, 12, 128, 64]> attention_output_17_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_85, query = transpose_84, value = transpose_86)[name = string("attention_output_17_cast_fp16")];
+            tensor<int32, [4]> attention_output_19_perm_0 = const()[name = string("attention_output_19_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_467 = const()[name = string("op_467"), val = tensor<int32, [3]>([1, 128, 768])];
+            tensor<fp16, [1, 128, 12, 64]> attention_output_19_cast_fp16 = transpose(perm = attention_output_19_perm_0, x = attention_output_17_cast_fp16)[name = string("transpose_137")];
+            tensor<fp16, [1, 128, 768]> input_57_cast_fp16 = reshape(shape = var_467, x = attention_output_19_cast_fp16)[name = string("input_57_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_28_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_57_cast_fp16)[name = string("linear_28_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_59_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_28_cast_fp16)[name = string("input_59_cast_fp16")];
+            tensor<int32, [1]> input_61_axes_0 = const()[name = string("input_61_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> input_61_cast_fp16 = layer_norm(axes = input_61_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_59_cast_fp16)[name = string("input_61_cast_fp16")];
+            tensor<fp16, [1, 128, 2048]> linear_29_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_61_cast_fp16)[name = string("linear_29_cast_fp16")];
+            string input_65_mode_0 = const()[name = string("input_65_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 128, 2048]> input_65_cast_fp16 = gelu(mode = input_65_mode_0, x = linear_29_cast_fp16)[name = string("input_65_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_30_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_65_cast_fp16)[name = string("linear_30_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_67_cast_fp16 = add(x = linear_30_cast_fp16, y = input_61_cast_fp16)[name = string("input_67_cast_fp16")];
+            tensor<int32, [1]> hidden_states_11_axes_0 = const()[name = string("hidden_states_11_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> hidden_states_11_cast_fp16 = layer_norm(axes = hidden_states_11_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_67_cast_fp16)[name = string("hidden_states_11_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_31_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_31_cast_fp16")];
+            tensor<int32, [4]> var_518 = const()[name = string("op_518"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_63_cast_fp16 = reshape(shape = var_518, x = linear_31_cast_fp16)[name = string("x_63_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_32_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_32_cast_fp16")];
+            tensor<int32, [4]> var_527 = const()[name = string("op_527"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_67_cast_fp16 = reshape(shape = var_527, x = linear_32_cast_fp16)[name = string("x_67_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_33_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_33_cast_fp16")];
+            tensor<int32, [4]> var_536 = const()[name = string("op_536"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_71_cast_fp16 = reshape(shape = var_536, x = linear_33_cast_fp16)[name = string("x_71_cast_fp16")];
+            tensor<int32, [4]> transpose_87_perm_0 = const()[name = string("transpose_87_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_88_perm_0 = const()[name = string("transpose_88_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_89_perm_0 = const()[name = string("transpose_89_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 128, 64]> transpose_89 = transpose(perm = transpose_89_perm_0, x = x_71_cast_fp16)[name = string("transpose_134")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_88 = transpose(perm = transpose_88_perm_0, x = x_67_cast_fp16)[name = string("transpose_135")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_87 = transpose(perm = transpose_87_perm_0, x = x_63_cast_fp16)[name = string("transpose_136")];
+            tensor<fp16, [1, 12, 128, 64]> attention_output_21_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_88, query = transpose_87, value = transpose_89)[name = string("attention_output_21_cast_fp16")];
+            tensor<int32, [4]> attention_output_23_perm_0 = const()[name = string("attention_output_23_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_542 = const()[name = string("op_542"), val = tensor<int32, [3]>([1, 128, 768])];
+            tensor<fp16, [1, 128, 12, 64]> attention_output_23_cast_fp16 = transpose(perm = attention_output_23_perm_0, x = attention_output_21_cast_fp16)[name = string("transpose_133")];
+            tensor<fp16, [1, 128, 768]> input_69_cast_fp16 = reshape(shape = var_542, x = attention_output_23_cast_fp16)[name = string("input_69_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_34_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_69_cast_fp16)[name = string("linear_34_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_71_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = linear_34_cast_fp16)[name = string("input_71_cast_fp16")];
+            tensor<int32, [1]> input_73_axes_0 = const()[name = string("input_73_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> input_73_cast_fp16 = layer_norm(axes = input_73_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("input_73_cast_fp16")];
+            tensor<fp16, [1, 128, 2048]> linear_35_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_73_cast_fp16)[name = string("linear_35_cast_fp16")];
+            string input_77_mode_0 = const()[name = string("input_77_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 128, 2048]> input_77_cast_fp16 = gelu(mode = input_77_mode_0, x = linear_35_cast_fp16)[name = string("input_77_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_36_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_77_cast_fp16)[name = string("linear_36_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_79_cast_fp16 = add(x = linear_36_cast_fp16, y = input_73_cast_fp16)[name = string("input_79_cast_fp16")];
+            tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> hidden_states_13_cast_fp16 = layer_norm(axes = hidden_states_13_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_79_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_37_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_37_cast_fp16")];
+            tensor<int32, [4]> var_593 = const()[name = string("op_593"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_75_cast_fp16 = reshape(shape = var_593, x = linear_37_cast_fp16)[name = string("x_75_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_38_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_38_cast_fp16")];
+            tensor<int32, [4]> var_602 = const()[name = string("op_602"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_79_cast_fp16 = reshape(shape = var_602, x = linear_38_cast_fp16)[name = string("x_79_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_39_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_39_cast_fp16")];
+            tensor<int32, [4]> var_611 = const()[name = string("op_611"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_83_cast_fp16 = reshape(shape = var_611, x = linear_39_cast_fp16)[name = string("x_83_cast_fp16")];
+            tensor<int32, [4]> transpose_90_perm_0 = const()[name = string("transpose_90_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_91_perm_0 = const()[name = string("transpose_91_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_92_perm_0 = const()[name = string("transpose_92_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 128, 64]> transpose_92 = transpose(perm = transpose_92_perm_0, x = x_83_cast_fp16)[name = string("transpose_130")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_91 = transpose(perm = transpose_91_perm_0, x = x_79_cast_fp16)[name = string("transpose_131")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_90 = transpose(perm = transpose_90_perm_0, x = x_75_cast_fp16)[name = string("transpose_132")];
+            tensor<fp16, [1, 12, 128, 64]> attention_output_25_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_91, query = transpose_90, value = transpose_92)[name = string("attention_output_25_cast_fp16")];
+            tensor<int32, [4]> attention_output_27_perm_0 = const()[name = string("attention_output_27_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_617 = const()[name = string("op_617"), val = tensor<int32, [3]>([1, 128, 768])];
+            tensor<fp16, [1, 128, 12, 64]> attention_output_27_cast_fp16 = transpose(perm = attention_output_27_perm_0, x = attention_output_25_cast_fp16)[name = string("transpose_129")];
+            tensor<fp16, [1, 128, 768]> input_81_cast_fp16 = reshape(shape = var_617, x = attention_output_27_cast_fp16)[name = string("input_81_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_40_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_81_cast_fp16)[name = string("linear_40_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_83_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = linear_40_cast_fp16)[name = string("input_83_cast_fp16")];
+            tensor<int32, [1]> input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> input_85_cast_fp16 = layer_norm(axes = input_85_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_83_cast_fp16)[name = string("input_85_cast_fp16")];
+            tensor<fp16, [1, 128, 2048]> linear_41_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_85_cast_fp16)[name = string("linear_41_cast_fp16")];
+            string input_89_mode_0 = const()[name = string("input_89_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 128, 2048]> input_89_cast_fp16 = gelu(mode = input_89_mode_0, x = linear_41_cast_fp16)[name = string("input_89_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_42_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_89_cast_fp16)[name = string("linear_42_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_91_cast_fp16 = add(x = linear_42_cast_fp16, y = input_85_cast_fp16)[name = string("input_91_cast_fp16")];
+            tensor<int32, [1]> hidden_states_15_axes_0 = const()[name = string("hidden_states_15_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> hidden_states_15_cast_fp16 = layer_norm(axes = hidden_states_15_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_91_cast_fp16)[name = string("hidden_states_15_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_43_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_43_cast_fp16")];
+            tensor<int32, [4]> var_668 = const()[name = string("op_668"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_87_cast_fp16 = reshape(shape = var_668, x = linear_43_cast_fp16)[name = string("x_87_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_44_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_44_cast_fp16")];
+            tensor<int32, [4]> var_677 = const()[name = string("op_677"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_91_cast_fp16 = reshape(shape = var_677, x = linear_44_cast_fp16)[name = string("x_91_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_45_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_45_cast_fp16")];
+            tensor<int32, [4]> var_686 = const()[name = string("op_686"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_95_cast_fp16 = reshape(shape = var_686, x = linear_45_cast_fp16)[name = string("x_95_cast_fp16")];
+            tensor<int32, [4]> transpose_93_perm_0 = const()[name = string("transpose_93_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_94_perm_0 = const()[name = string("transpose_94_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_95_perm_0 = const()[name = string("transpose_95_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 128, 64]> transpose_95 = transpose(perm = transpose_95_perm_0, x = x_95_cast_fp16)[name = string("transpose_126")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_94 = transpose(perm = transpose_94_perm_0, x = x_91_cast_fp16)[name = string("transpose_127")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_93 = transpose(perm = transpose_93_perm_0, x = x_87_cast_fp16)[name = string("transpose_128")];
+            tensor<fp16, [1, 12, 128, 64]> attention_output_29_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_94, query = transpose_93, value = transpose_95)[name = string("attention_output_29_cast_fp16")];
+            tensor<int32, [4]> attention_output_31_perm_0 = const()[name = string("attention_output_31_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_692 = const()[name = string("op_692"), val = tensor<int32, [3]>([1, 128, 768])];
+            tensor<fp16, [1, 128, 12, 64]> attention_output_31_cast_fp16 = transpose(perm = attention_output_31_perm_0, x = attention_output_29_cast_fp16)[name = string("transpose_125")];
+            tensor<fp16, [1, 128, 768]> input_93_cast_fp16 = reshape(shape = var_692, x = attention_output_31_cast_fp16)[name = string("input_93_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_46_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_93_cast_fp16)[name = string("linear_46_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_95_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = linear_46_cast_fp16)[name = string("input_95_cast_fp16")];
+            tensor<int32, [1]> input_97_axes_0 = const()[name = string("input_97_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> input_97_cast_fp16 = layer_norm(axes = input_97_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_95_cast_fp16)[name = string("input_97_cast_fp16")];
+            tensor<fp16, [1, 128, 2048]> linear_47_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_97_cast_fp16)[name = string("linear_47_cast_fp16")];
+            string input_101_mode_0 = const()[name = string("input_101_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 128, 2048]> input_101_cast_fp16 = gelu(mode = input_101_mode_0, x = linear_47_cast_fp16)[name = string("input_101_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_48_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_101_cast_fp16)[name = string("linear_48_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_103_cast_fp16 = add(x = linear_48_cast_fp16, y = input_97_cast_fp16)[name = string("input_103_cast_fp16")];
+            tensor<int32, [1]> hidden_states_17_axes_0 = const()[name = string("hidden_states_17_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> hidden_states_17_cast_fp16 = layer_norm(axes = hidden_states_17_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_103_cast_fp16)[name = string("hidden_states_17_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_49_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_49_cast_fp16")];
+            tensor<int32, [4]> var_743 = const()[name = string("op_743"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_99_cast_fp16 = reshape(shape = var_743, x = linear_49_cast_fp16)[name = string("x_99_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_50_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_50_cast_fp16")];
+            tensor<int32, [4]> var_752 = const()[name = string("op_752"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_103_cast_fp16 = reshape(shape = var_752, x = linear_50_cast_fp16)[name = string("x_103_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_51_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_51_cast_fp16")];
+            tensor<int32, [4]> var_761 = const()[name = string("op_761"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_107_cast_fp16 = reshape(shape = var_761, x = linear_51_cast_fp16)[name = string("x_107_cast_fp16")];
+            tensor<int32, [4]> transpose_96_perm_0 = const()[name = string("transpose_96_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_97_perm_0 = const()[name = string("transpose_97_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_98_perm_0 = const()[name = string("transpose_98_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 128, 64]> transpose_98 = transpose(perm = transpose_98_perm_0, x = x_107_cast_fp16)[name = string("transpose_122")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_97 = transpose(perm = transpose_97_perm_0, x = x_103_cast_fp16)[name = string("transpose_123")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_96 = transpose(perm = transpose_96_perm_0, x = x_99_cast_fp16)[name = string("transpose_124")];
+            tensor<fp16, [1, 12, 128, 64]> attention_output_33_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_97, query = transpose_96, value = transpose_98)[name = string("attention_output_33_cast_fp16")];
+            tensor<int32, [4]> attention_output_35_perm_0 = const()[name = string("attention_output_35_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_767 = const()[name = string("op_767"), val = tensor<int32, [3]>([1, 128, 768])];
+            tensor<fp16, [1, 128, 12, 64]> attention_output_35_cast_fp16 = transpose(perm = attention_output_35_perm_0, x = attention_output_33_cast_fp16)[name = string("transpose_121")];
+            tensor<fp16, [1, 128, 768]> input_105_cast_fp16 = reshape(shape = var_767, x = attention_output_35_cast_fp16)[name = string("input_105_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_52_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_105_cast_fp16)[name = string("linear_52_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_107_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_52_cast_fp16)[name = string("input_107_cast_fp16")];
+            tensor<int32, [1]> input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> input_109_cast_fp16 = layer_norm(axes = input_109_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_107_cast_fp16)[name = string("input_109_cast_fp16")];
+            tensor<fp16, [1, 128, 2048]> linear_53_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_109_cast_fp16)[name = string("linear_53_cast_fp16")];
+            string input_113_mode_0 = const()[name = string("input_113_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 128, 2048]> input_113_cast_fp16 = gelu(mode = input_113_mode_0, x = linear_53_cast_fp16)[name = string("input_113_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_54_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_113_cast_fp16)[name = string("linear_54_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_115_cast_fp16 = add(x = linear_54_cast_fp16, y = input_109_cast_fp16)[name = string("input_115_cast_fp16")];
+            tensor<int32, [1]> hidden_states_19_axes_0 = const()[name = string("hidden_states_19_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> hidden_states_19_cast_fp16 = layer_norm(axes = hidden_states_19_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_115_cast_fp16)[name = string("hidden_states_19_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_55_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_55_cast_fp16")];
+            tensor<int32, [4]> var_818 = const()[name = string("op_818"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_111_cast_fp16 = reshape(shape = var_818, x = linear_55_cast_fp16)[name = string("x_111_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_56_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_56_cast_fp16")];
+            tensor<int32, [4]> var_827 = const()[name = string("op_827"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_115_cast_fp16 = reshape(shape = var_827, x = linear_56_cast_fp16)[name = string("x_115_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_57_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_57_cast_fp16")];
+            tensor<int32, [4]> var_836 = const()[name = string("op_836"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_119_cast_fp16 = reshape(shape = var_836, x = linear_57_cast_fp16)[name = string("x_119_cast_fp16")];
+            tensor<int32, [4]> transpose_99_perm_0 = const()[name = string("transpose_99_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_100_perm_0 = const()[name = string("transpose_100_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_101_perm_0 = const()[name = string("transpose_101_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 128, 64]> transpose_101 = transpose(perm = transpose_101_perm_0, x = x_119_cast_fp16)[name = string("transpose_118")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_100 = transpose(perm = transpose_100_perm_0, x = x_115_cast_fp16)[name = string("transpose_119")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_99 = transpose(perm = transpose_99_perm_0, x = x_111_cast_fp16)[name = string("transpose_120")];
+            tensor<fp16, [1, 12, 128, 64]> attention_output_37_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_100, query = transpose_99, value = transpose_101)[name = string("attention_output_37_cast_fp16")];
+            tensor<int32, [4]> attention_output_39_perm_0 = const()[name = string("attention_output_39_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_842 = const()[name = string("op_842"), val = tensor<int32, [3]>([1, 128, 768])];
+            tensor<fp16, [1, 128, 12, 64]> attention_output_39_cast_fp16 = transpose(perm = attention_output_39_perm_0, x = attention_output_37_cast_fp16)[name = string("transpose_117")];
+            tensor<fp16, [1, 128, 768]> input_117_cast_fp16 = reshape(shape = var_842, x = attention_output_39_cast_fp16)[name = string("input_117_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_58_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_117_cast_fp16)[name = string("linear_58_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_119_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = linear_58_cast_fp16)[name = string("input_119_cast_fp16")];
+            tensor<int32, [1]> input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> input_121_cast_fp16 = layer_norm(axes = input_121_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("input_121_cast_fp16")];
+            tensor<fp16, [1, 128, 2048]> linear_59_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_121_cast_fp16)[name = string("linear_59_cast_fp16")];
+            string input_125_mode_0 = const()[name = string("input_125_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 128, 2048]> input_125_cast_fp16 = gelu(mode = input_125_mode_0, x = linear_59_cast_fp16)[name = string("input_125_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_60_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_125_cast_fp16)[name = string("linear_60_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_127_cast_fp16 = add(x = linear_60_cast_fp16, y = input_121_cast_fp16)[name = string("input_127_cast_fp16")];
+            tensor<int32, [1]> hidden_states_21_axes_0 = const()[name = string("hidden_states_21_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> hidden_states_21_cast_fp16 = layer_norm(axes = hidden_states_21_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_127_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_61_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_61_cast_fp16")];
+            tensor<int32, [4]> var_893 = const()[name = string("op_893"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_123_cast_fp16 = reshape(shape = var_893, x = linear_61_cast_fp16)[name = string("x_123_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_62_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_62_cast_fp16")];
+            tensor<int32, [4]> var_902 = const()[name = string("op_902"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_127_cast_fp16 = reshape(shape = var_902, x = linear_62_cast_fp16)[name = string("x_127_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_63_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_63_cast_fp16")];
+            tensor<int32, [4]> var_911 = const()[name = string("op_911"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_131_cast_fp16 = reshape(shape = var_911, x = linear_63_cast_fp16)[name = string("x_131_cast_fp16")];
+            tensor<int32, [4]> transpose_102_perm_0 = const()[name = string("transpose_102_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_103_perm_0 = const()[name = string("transpose_103_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_104_perm_0 = const()[name = string("transpose_104_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 128, 64]> transpose_104 = transpose(perm = transpose_104_perm_0, x = x_131_cast_fp16)[name = string("transpose_114")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_103 = transpose(perm = transpose_103_perm_0, x = x_127_cast_fp16)[name = string("transpose_115")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_102 = transpose(perm = transpose_102_perm_0, x = x_123_cast_fp16)[name = string("transpose_116")];
+            tensor<fp16, [1, 12, 128, 64]> attention_output_41_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_103, query = transpose_102, value = transpose_104)[name = string("attention_output_41_cast_fp16")];
+            tensor<int32, [4]> attention_output_43_perm_0 = const()[name = string("attention_output_43_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_917 = const()[name = string("op_917"), val = tensor<int32, [3]>([1, 128, 768])];
+            tensor<fp16, [1, 128, 12, 64]> attention_output_43_cast_fp16 = transpose(perm = attention_output_43_perm_0, x = attention_output_41_cast_fp16)[name = string("transpose_113")];
+            tensor<fp16, [1, 128, 768]> input_129_cast_fp16 = reshape(shape = var_917, x = attention_output_43_cast_fp16)[name = string("input_129_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_64_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_129_cast_fp16)[name = string("linear_64_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_131_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = linear_64_cast_fp16)[name = string("input_131_cast_fp16")];
+            tensor<int32, [1]> input_133_axes_0 = const()[name = string("input_133_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> input_133_cast_fp16 = layer_norm(axes = input_133_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_131_cast_fp16)[name = string("input_133_cast_fp16")];
+            tensor<fp16, [1, 128, 2048]> linear_65_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_133_cast_fp16)[name = string("linear_65_cast_fp16")];
+            string input_137_mode_0 = const()[name = string("input_137_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 128, 2048]> input_137_cast_fp16 = gelu(mode = input_137_mode_0, x = linear_65_cast_fp16)[name = string("input_137_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_66_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_137_cast_fp16)[name = string("linear_66_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_139_cast_fp16 = add(x = linear_66_cast_fp16, y = input_133_cast_fp16)[name = string("input_139_cast_fp16")];
+            tensor<int32, [1]> hidden_states_axes_0 = const()[name = string("hidden_states_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> hidden_states_cast_fp16 = layer_norm(axes = hidden_states_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_139_cast_fp16)[name = string("hidden_states_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_67_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_67_cast_fp16")];
+            tensor<int32, [4]> var_968 = const()[name = string("op_968"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_135_cast_fp16 = reshape(shape = var_968, x = linear_67_cast_fp16)[name = string("x_135_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_68_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_68_cast_fp16")];
+            tensor<int32, [4]> var_977 = const()[name = string("op_977"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_139_cast_fp16 = reshape(shape = var_977, x = linear_68_cast_fp16)[name = string("x_139_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_69_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_69_cast_fp16")];
+            tensor<int32, [4]> var_986 = const()[name = string("op_986"), val = tensor<int32, [4]>([1, 128, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> x_cast_fp16 = reshape(shape = var_986, x = linear_69_cast_fp16)[name = string("x_cast_fp16")];
+            tensor<int32, [4]> transpose_105_perm_0 = const()[name = string("transpose_105_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_106_perm_0 = const()[name = string("transpose_106_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_107_perm_0 = const()[name = string("transpose_107_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 128, 64]> transpose_107 = transpose(perm = transpose_107_perm_0, x = x_cast_fp16)[name = string("transpose_110")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_106 = transpose(perm = transpose_106_perm_0, x = x_139_cast_fp16)[name = string("transpose_111")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_105 = transpose(perm = transpose_105_perm_0, x = x_135_cast_fp16)[name = string("transpose_112")];
+            tensor<fp16, [1, 12, 128, 64]> attention_output_45_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_106, query = transpose_105, value = transpose_107)[name = string("attention_output_45_cast_fp16")];
+            tensor<int32, [4]> attention_output_perm_0 = const()[name = string("attention_output_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_992 = const()[name = string("op_992"), val = tensor<int32, [3]>([1, 128, 768])];
+            tensor<fp16, [1, 128, 12, 64]> attention_output_cast_fp16 = transpose(perm = attention_output_perm_0, x = attention_output_45_cast_fp16)[name = string("transpose_109")];
+            tensor<fp16, [1, 128, 768]> input_141_cast_fp16 = reshape(shape = var_992, x = attention_output_cast_fp16)[name = string("input_141_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_70_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_141_cast_fp16)[name = string("linear_70_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_143_cast_fp16 = add(x = hidden_states_cast_fp16, y = linear_70_cast_fp16)[name = string("input_143_cast_fp16")];
+            tensor<int32, [1]> input_145_axes_0 = const()[name = string("input_145_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> input_145_cast_fp16 = layer_norm(axes = input_145_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_143_cast_fp16)[name = string("input_145_cast_fp16")];
+            tensor<fp16, [1, 128, 2048]> linear_71_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_145_cast_fp16)[name = string("linear_71_cast_fp16")];
+            string input_149_mode_0 = const()[name = string("input_149_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 128, 2048]> input_149_cast_fp16 = gelu(mode = input_149_mode_0, x = linear_71_cast_fp16)[name = string("input_149_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> linear_72_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_149_cast_fp16)[name = string("linear_72_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_151_cast_fp16 = add(x = linear_72_cast_fp16, y = input_145_cast_fp16)[name = string("input_151_cast_fp16")];
+            tensor<int32, [1]> sequence_output_axes_0 = const()[name = string("sequence_output_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 128, 768]> sequence_output = layer_norm(axes = sequence_output_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_151_cast_fp16)[name = string("sequence_output_cast_fp16")];
+            tensor<fp16, [512, 768]> bert_encoder_weight_to_fp16 = const()[name = string("bert_encoder_weight_to_fp16"), val = tensor<fp16, [512, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11339264)))];
+            tensor<fp16, [512]> bert_encoder_bias_to_fp16 = const()[name = string("bert_encoder_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12125760)))];
+            tensor<fp16, [1, 128, 512]> linear_73_cast_fp16 = linear(bias = bert_encoder_bias_to_fp16, weight = bert_encoder_weight_to_fp16, x = sequence_output)[name = string("linear_73_cast_fp16")];
+            tensor<int32, [3]> var_1030_perm_0 = const()[name = string("op_1030_perm_0"), val = tensor<int32, [3]>([0, -1, -2])];
+            tensor<fp16, [1, 512, 128]> var_1030 = transpose(perm = var_1030_perm_0, x = linear_73_cast_fp16)[name = string("transpose_108")];
+        } -> (sequence_output, var_1030);
+}

iteration_3/compiled/bert_fp16_t128.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ff3ca8fac0332427ddfe5e78954382359d26516284113001a7484b60455eb10
+size 12126848

iteration_3/compiled/bert_fp16_t256.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cdd98e544fdd1d3002b708faa81092ba37b68369dfedee5cb0152a8340b68bdc
+size 243

iteration_3/compiled/bert_fp16_t256.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2aba3b0f5fbc0614df555dcbfd939baa534dece2cf898b67db6a99169e55875a
+size 441

iteration_3/compiled/bert_fp16_t256.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,94 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 256 × 768)",
+        "shortDescription" : "",
+        "shape" : "[1, 256, 768]",
+        "name" : "sequence_output",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 512 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 512, 256]",
+        "name" : "var_1030",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.linear" : 74,
+      "Ios18.scaledDotProductAttention" : 12,
+      "Ios18.sub" : 1,
+      "Select" : 2,
+      "Ios18.expandDims" : 2,
+      "Ios18.gelu" : 12,
+      "Ios18.gather" : 1,
+      "Ios18.add" : 27,
+      "Tile" : 1,
+      "Ios18.layerNorm" : 25,
+      "Ios18.transpose" : 49,
+      "Ios18.cast" : 5,
+      "Ios18.reshape" : 48,
+      "Ios18.greaterEqual" : 1
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2026-05-08",
+      "com.github.apple.coremltools.source" : "torch==2.11.0",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 256]",
+        "name" : "tokens",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 256]",
+        "name" : "attention_mask",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "bert_fp16_t256",
+    "method" : "predict"
+  }
+]

iteration_3/compiled/bert_fp16_t256.mlmodelc/model.mil ADDED Viewed

	@@ -0,0 +1,442 @@

+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.11.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
+{
+    func main<ios18>(tensor<int32, [1, 256]> attention_mask, tensor<int32, [1, 256]> tokens) {
+            int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)];
+            bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [178, 128]> bert_embeddings_word_embeddings_weight_to_fp16 = const()[name = string("bert_embeddings_word_embeddings_weight_to_fp16"), val = tensor<fp16, [178, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            string tokens_to_int16_dtype_0 = const()[name = string("tokens_to_int16_dtype_0"), val = string("int16")];
+            string cast_53_dtype_0 = const()[name = string("cast_53_dtype_0"), val = string("int32")];
+            int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
+            tensor<int16, [1, 256]> tokens_to_int16 = cast(dtype = tokens_to_int16_dtype_0, x = tokens)[name = string("cast_58")];
+            tensor<int32, [1, 256]> cast_53 = cast(dtype = cast_53_dtype_0, x = tokens_to_int16)[name = string("cast_57")];
+            tensor<bool, [1, 256]> greater_equal_0 = greater_equal(x = cast_53, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
+            int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(178)];
+            tensor<int32, [1, 256]> add_0 = add(x = cast_53, y = slice_by_index_0)[name = string("add_0")];
+            tensor<int32, [1, 256]> select_0 = select(a = cast_53, b = add_0, cond = greater_equal_0)[name = string("select_0")];
+            int32 inputs_embeds_cast_fp16_cast_uint16_axis_0 = const()[name = string("inputs_embeds_cast_fp16_cast_uint16_axis_0"), val = int32(0)];
+            string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")];
+            tensor<int16, [1, 256]> select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_56")];
+            tensor<fp16, [1, 256, 128]> inputs_embeds_cast_fp16_cast_uint16_cast_uint16 = gather(axis = inputs_embeds_cast_fp16_cast_uint16_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = select_0_to_int16, validate_indices = inputs_embeds_validate_indices_0, x = bert_embeddings_word_embeddings_weight_to_fp16)[name = string("inputs_embeds_cast_fp16_cast_uint16_cast_uint16")];
+            tensor<fp16, [1, 256, 128]> token_type_embeddings_1_to_fp16 = const()[name = string("token_type_embeddings_1_to_fp16"), val = tensor<fp16, [1, 256, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45696)))];
+            tensor<fp16, [1, 256, 128]> embeddings_1_cast_fp16 = add(x = inputs_embeds_cast_fp16_cast_uint16_cast_uint16, y = token_type_embeddings_1_to_fp16)[name = string("embeddings_1_cast_fp16")];
+            tensor<fp16, [1, 256, 128]> position_embeddings_1_to_fp16 = const()[name = string("position_embeddings_1_to_fp16"), val = tensor<fp16, [1, 256, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111296)))];
+            tensor<fp16, [1, 256, 128]> input_5_cast_fp16 = add(x = embeddings_1_cast_fp16, y = position_embeddings_1_to_fp16)[name = string("input_5_cast_fp16")];
+            tensor<int32, [1]> input_7_axes_0 = const()[name = string("input_7_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [128]> bert_embeddings_LayerNorm_weight_to_fp16 = const()[name = string("bert_embeddings_LayerNorm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(176896)))];
+            tensor<fp16, [128]> bert_embeddings_LayerNorm_bias_to_fp16 = const()[name = string("bert_embeddings_LayerNorm_bias_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177216)))];
+            fp16 var_34_to_fp16 = const()[name = string("op_34_to_fp16"), val = fp16(0x1p-24)];
+            tensor<fp16, [1, 256, 128]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = bert_embeddings_LayerNorm_bias_to_fp16, epsilon = var_34_to_fp16, gamma = bert_embeddings_LayerNorm_weight_to_fp16, x = input_5_cast_fp16)[name = string("input_7_cast_fp16")];
+            tensor<int32, [1]> var_79_axes_0 = const()[name = string("op_79_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1, 1, 256]> var_79 = expand_dims(axes = var_79_axes_0, x = attention_mask)[name = string("op_79")];
+            tensor<int32, [1]> var_81_axes_0 = const()[name = string("op_81_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1, 1, 1, 256]> var_81 = expand_dims(axes = var_81_axes_0, x = var_79)[name = string("op_81")];
+            tensor<int32, [4]> var_90_reps_0 = const()[name = string("op_90_reps_0"), val = tensor<int32, [4]>([1, 1, 256, 1])];
+            tensor<int32, [1, 1, 256, 256]> var_90 = tile(reps = var_90_reps_0, x = var_81)[name = string("op_90")];
+            fp16 var_96_to_fp16 = const()[name = string("op_96_to_fp16"), val = fp16(0x1p+0)];
+            string var_95_to_fp16_dtype_0 = const()[name = string("op_95_to_fp16_dtype_0"), val = string("fp16")];
+            tensor<fp16, [1, 1, 256, 256]> var_90_to_fp16 = cast(dtype = var_95_to_fp16_dtype_0, x = var_90)[name = string("cast_55")];
+            tensor<fp16, [1, 1, 256, 256]> inverted_mask_cast_fp16 = sub(x = var_96_to_fp16, y = var_90_to_fp16)[name = string("inverted_mask_cast_fp16")];
+            string var_103_dtype_0 = const()[name = string("op_103_dtype_0"), val = string("bool")];
+            fp16 var_104_to_fp16 = const()[name = string("op_104_to_fp16"), val = fp16(-inf)];
+            tensor<bool, [1, 1, 256, 256]> inverted_mask_cast_fp16_to_bool = cast(dtype = var_103_dtype_0, x = inverted_mask_cast_fp16)[name = string("cast_54")];
+            tensor<fp16, [1, 1, 256, 256]> attention_mask_cast_fp16 = select(a = var_104_to_fp16, b = inverted_mask_cast_fp16, cond = inverted_mask_cast_fp16_to_bool)[name = string("attention_mask_cast_fp16")];
+            tensor<fp16, [768, 128]> bert_encoder_embedding_hidden_mapping_in_weight_to_fp16 = const()[name = string("bert_encoder_embedding_hidden_mapping_in_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177536)))];
+            tensor<fp16, [768]> bert_encoder_embedding_hidden_mapping_in_bias_to_fp16 = const()[name = string("bert_encoder_embedding_hidden_mapping_in_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374208)))];
+            tensor<fp16, [1, 256, 768]> linear_0_cast_fp16 = linear(bias = bert_encoder_embedding_hidden_mapping_in_bias_to_fp16, weight = bert_encoder_embedding_hidden_mapping_in_weight_to_fp16, x = input_7_cast_fp16)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375808)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1555520)))];
+            tensor<fp16, [1, 256, 768]> linear_1_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_1_cast_fp16")];
+            tensor<int32, [4]> var_143 = const()[name = string("op_143"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_3_cast_fp16 = reshape(shape = var_143, x = linear_1_cast_fp16)[name = string("x_3_cast_fp16")];
+            tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1557120)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2736832)))];
+            tensor<fp16, [1, 256, 768]> linear_2_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_2_cast_fp16")];
+            tensor<int32, [4]> var_152 = const()[name = string("op_152"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_7_cast_fp16 = reshape(shape = var_152, x = linear_2_cast_fp16)[name = string("x_7_cast_fp16")];
+            tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2738432)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3918144)))];
+            tensor<fp16, [1, 256, 768]> linear_3_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_3_cast_fp16")];
+            tensor<int32, [4]> var_161 = const()[name = string("op_161"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_11_cast_fp16 = reshape(shape = var_161, x = linear_3_cast_fp16)[name = string("x_11_cast_fp16")];
+            tensor<int32, [4]> transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 256, 64]> transpose_74 = transpose(perm = transpose_74_perm_0, x = x_11_cast_fp16)[name = string("transpose_154")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_73 = transpose(perm = transpose_73_perm_0, x = x_7_cast_fp16)[name = string("transpose_155")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_72 = transpose(perm = transpose_72_perm_0, x = x_3_cast_fp16)[name = string("transpose_156")];
+            tensor<fp16, [1, 12, 256, 64]> attention_output_1_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_73, query = transpose_72, value = transpose_74)[name = string("attention_output_1_cast_fp16")];
+            tensor<int32, [4]> attention_output_3_perm_0 = const()[name = string("attention_output_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_167 = const()[name = string("op_167"), val = tensor<int32, [3]>([1, 256, 768])];
+            tensor<fp16, [1, 256, 12, 64]> attention_output_3_cast_fp16 = transpose(perm = attention_output_3_perm_0, x = attention_output_1_cast_fp16)[name = string("transpose_153")];
+            tensor<fp16, [1, 256, 768]> input_9_cast_fp16 = reshape(shape = var_167, x = attention_output_3_cast_fp16)[name = string("input_9_cast_fp16")];
+            tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3919744)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5099456)))];
+            tensor<fp16, [1, 256, 768]> linear_4_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_9_cast_fp16)[name = string("linear_4_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_11_cast_fp16 = add(x = linear_0_cast_fp16, y = linear_4_cast_fp16)[name = string("input_11_cast_fp16")];
+            tensor<int32, [1]> input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5101056)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5102656)))];
+            fp16 var_118_to_fp16 = const()[name = string("op_118_to_fp16"), val = fp16(0x1p-24)];
+            tensor<fp16, [1, 256, 768]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_11_cast_fp16)[name = string("input_13_cast_fp16")];
+            tensor<fp16, [2048, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16"), val = tensor<fp16, [2048, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5104256)))];
+            tensor<fp16, [2048]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8250048)))];
+            tensor<fp16, [1, 256, 2048]> linear_5_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_13_cast_fp16)[name = string("linear_5_cast_fp16")];
+            string input_17_mode_0 = const()[name = string("input_17_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 256, 2048]> input_17_cast_fp16 = gelu(mode = input_17_mode_0, x = linear_5_cast_fp16)[name = string("input_17_cast_fp16")];
+            tensor<fp16, [768, 2048]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16"), val = tensor<fp16, [768, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8254208)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11400000)))];
+            tensor<fp16, [1, 256, 768]> linear_6_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_17_cast_fp16)[name = string("linear_6_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_19_cast_fp16 = add(x = linear_6_cast_fp16, y = input_13_cast_fp16)[name = string("input_19_cast_fp16")];
+            tensor<int32, [1]> hidden_states_3_axes_0 = const()[name = string("hidden_states_3_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11401600)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11403200)))];
+            tensor<fp16, [1, 256, 768]> hidden_states_3_cast_fp16 = layer_norm(axes = hidden_states_3_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_19_cast_fp16)[name = string("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_7_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_7_cast_fp16")];
+            tensor<int32, [4]> var_218 = const()[name = string("op_218"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_15_cast_fp16 = reshape(shape = var_218, x = linear_7_cast_fp16)[name = string("x_15_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_8_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_8_cast_fp16")];
+            tensor<int32, [4]> var_227 = const()[name = string("op_227"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_19_cast_fp16 = reshape(shape = var_227, x = linear_8_cast_fp16)[name = string("x_19_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_9_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_9_cast_fp16")];
+            tensor<int32, [4]> var_236 = const()[name = string("op_236"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_23_cast_fp16 = reshape(shape = var_236, x = linear_9_cast_fp16)[name = string("x_23_cast_fp16")];
+            tensor<int32, [4]> transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 256, 64]> transpose_77 = transpose(perm = transpose_77_perm_0, x = x_23_cast_fp16)[name = string("transpose_150")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_76 = transpose(perm = transpose_76_perm_0, x = x_19_cast_fp16)[name = string("transpose_151")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_75 = transpose(perm = transpose_75_perm_0, x = x_15_cast_fp16)[name = string("transpose_152")];
+            tensor<fp16, [1, 12, 256, 64]> attention_output_5_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_76, query = transpose_75, value = transpose_77)[name = string("attention_output_5_cast_fp16")];
+            tensor<int32, [4]> attention_output_7_perm_0 = const()[name = string("attention_output_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_242 = const()[name = string("op_242"), val = tensor<int32, [3]>([1, 256, 768])];
+            tensor<fp16, [1, 256, 12, 64]> attention_output_7_cast_fp16 = transpose(perm = attention_output_7_perm_0, x = attention_output_5_cast_fp16)[name = string("transpose_149")];
+            tensor<fp16, [1, 256, 768]> input_21_cast_fp16 = reshape(shape = var_242, x = attention_output_7_cast_fp16)[name = string("input_21_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_10_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_21_cast_fp16)[name = string("linear_10_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_23_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = linear_10_cast_fp16)[name = string("input_23_cast_fp16")];
+            tensor<int32, [1]> input_25_axes_0 = const()[name = string("input_25_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> input_25_cast_fp16 = layer_norm(axes = input_25_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_23_cast_fp16)[name = string("input_25_cast_fp16")];
+            tensor<fp16, [1, 256, 2048]> linear_11_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_25_cast_fp16)[name = string("linear_11_cast_fp16")];
+            string input_29_mode_0 = const()[name = string("input_29_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 256, 2048]> input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = linear_11_cast_fp16)[name = string("input_29_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_12_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_29_cast_fp16)[name = string("linear_12_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_31_cast_fp16 = add(x = linear_12_cast_fp16, y = input_25_cast_fp16)[name = string("input_31_cast_fp16")];
+            tensor<int32, [1]> hidden_states_5_axes_0 = const()[name = string("hidden_states_5_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> hidden_states_5_cast_fp16 = layer_norm(axes = hidden_states_5_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_31_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_13_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_13_cast_fp16")];
+            tensor<int32, [4]> var_293 = const()[name = string("op_293"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_27_cast_fp16 = reshape(shape = var_293, x = linear_13_cast_fp16)[name = string("x_27_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_14_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_14_cast_fp16")];
+            tensor<int32, [4]> var_302 = const()[name = string("op_302"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_31_cast_fp16 = reshape(shape = var_302, x = linear_14_cast_fp16)[name = string("x_31_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_15_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_15_cast_fp16")];
+            tensor<int32, [4]> var_311 = const()[name = string("op_311"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_35_cast_fp16 = reshape(shape = var_311, x = linear_15_cast_fp16)[name = string("x_35_cast_fp16")];
+            tensor<int32, [4]> transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 256, 64]> transpose_80 = transpose(perm = transpose_80_perm_0, x = x_35_cast_fp16)[name = string("transpose_146")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_79 = transpose(perm = transpose_79_perm_0, x = x_31_cast_fp16)[name = string("transpose_147")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_78 = transpose(perm = transpose_78_perm_0, x = x_27_cast_fp16)[name = string("transpose_148")];
+            tensor<fp16, [1, 12, 256, 64]> attention_output_9_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_79, query = transpose_78, value = transpose_80)[name = string("attention_output_9_cast_fp16")];
+            tensor<int32, [4]> attention_output_11_perm_0 = const()[name = string("attention_output_11_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_317 = const()[name = string("op_317"), val = tensor<int32, [3]>([1, 256, 768])];
+            tensor<fp16, [1, 256, 12, 64]> attention_output_11_cast_fp16 = transpose(perm = attention_output_11_perm_0, x = attention_output_9_cast_fp16)[name = string("transpose_145")];
+            tensor<fp16, [1, 256, 768]> input_33_cast_fp16 = reshape(shape = var_317, x = attention_output_11_cast_fp16)[name = string("input_33_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_16_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_33_cast_fp16)[name = string("linear_16_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_35_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = linear_16_cast_fp16)[name = string("input_35_cast_fp16")];
+            tensor<int32, [1]> input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("input_37_cast_fp16")];
+            tensor<fp16, [1, 256, 2048]> linear_17_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_37_cast_fp16)[name = string("linear_17_cast_fp16")];
+            string input_41_mode_0 = const()[name = string("input_41_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 256, 2048]> input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = linear_17_cast_fp16)[name = string("input_41_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_18_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_41_cast_fp16)[name = string("linear_18_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_43_cast_fp16 = add(x = linear_18_cast_fp16, y = input_37_cast_fp16)[name = string("input_43_cast_fp16")];
+            tensor<int32, [1]> hidden_states_7_axes_0 = const()[name = string("hidden_states_7_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> hidden_states_7_cast_fp16 = layer_norm(axes = hidden_states_7_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_43_cast_fp16)[name = string("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_19_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_19_cast_fp16")];
+            tensor<int32, [4]> var_368 = const()[name = string("op_368"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_39_cast_fp16 = reshape(shape = var_368, x = linear_19_cast_fp16)[name = string("x_39_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_20_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_20_cast_fp16")];
+            tensor<int32, [4]> var_377 = const()[name = string("op_377"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_43_cast_fp16 = reshape(shape = var_377, x = linear_20_cast_fp16)[name = string("x_43_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_21_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_21_cast_fp16")];
+            tensor<int32, [4]> var_386 = const()[name = string("op_386"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_47_cast_fp16 = reshape(shape = var_386, x = linear_21_cast_fp16)[name = string("x_47_cast_fp16")];
+            tensor<int32, [4]> transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 256, 64]> transpose_83 = transpose(perm = transpose_83_perm_0, x = x_47_cast_fp16)[name = string("transpose_142")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_82 = transpose(perm = transpose_82_perm_0, x = x_43_cast_fp16)[name = string("transpose_143")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_81 = transpose(perm = transpose_81_perm_0, x = x_39_cast_fp16)[name = string("transpose_144")];
+            tensor<fp16, [1, 12, 256, 64]> attention_output_13_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_82, query = transpose_81, value = transpose_83)[name = string("attention_output_13_cast_fp16")];
+            tensor<int32, [4]> attention_output_15_perm_0 = const()[name = string("attention_output_15_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_392 = const()[name = string("op_392"), val = tensor<int32, [3]>([1, 256, 768])];
+            tensor<fp16, [1, 256, 12, 64]> attention_output_15_cast_fp16 = transpose(perm = attention_output_15_perm_0, x = attention_output_13_cast_fp16)[name = string("transpose_141")];
+            tensor<fp16, [1, 256, 768]> input_45_cast_fp16 = reshape(shape = var_392, x = attention_output_15_cast_fp16)[name = string("input_45_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_22_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_45_cast_fp16)[name = string("linear_22_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_47_cast_fp16 = add(x = hidden_states_7_cast_fp16, y = linear_22_cast_fp16)[name = string("input_47_cast_fp16")];
+            tensor<int32, [1]> input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> input_49_cast_fp16 = layer_norm(axes = input_49_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_47_cast_fp16)[name = string("input_49_cast_fp16")];
+            tensor<fp16, [1, 256, 2048]> linear_23_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_49_cast_fp16)[name = string("linear_23_cast_fp16")];
+            string input_53_mode_0 = const()[name = string("input_53_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 256, 2048]> input_53_cast_fp16 = gelu(mode = input_53_mode_0, x = linear_23_cast_fp16)[name = string("input_53_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_24_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_53_cast_fp16)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_55_cast_fp16 = add(x = linear_24_cast_fp16, y = input_49_cast_fp16)[name = string("input_55_cast_fp16")];
+            tensor<int32, [1]> hidden_states_9_axes_0 = const()[name = string("hidden_states_9_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> hidden_states_9_cast_fp16 = layer_norm(axes = hidden_states_9_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_55_cast_fp16)[name = string("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_25_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_25_cast_fp16")];
+            tensor<int32, [4]> var_443 = const()[name = string("op_443"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_51_cast_fp16 = reshape(shape = var_443, x = linear_25_cast_fp16)[name = string("x_51_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_26_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_26_cast_fp16")];
+            tensor<int32, [4]> var_452 = const()[name = string("op_452"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_55_cast_fp16 = reshape(shape = var_452, x = linear_26_cast_fp16)[name = string("x_55_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_27_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_27_cast_fp16")];
+            tensor<int32, [4]> var_461 = const()[name = string("op_461"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_59_cast_fp16 = reshape(shape = var_461, x = linear_27_cast_fp16)[name = string("x_59_cast_fp16")];
+            tensor<int32, [4]> transpose_84_perm_0 = const()[name = string("transpose_84_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_85_perm_0 = const()[name = string("transpose_85_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_86_perm_0 = const()[name = string("transpose_86_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 256, 64]> transpose_86 = transpose(perm = transpose_86_perm_0, x = x_59_cast_fp16)[name = string("transpose_138")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_85 = transpose(perm = transpose_85_perm_0, x = x_55_cast_fp16)[name = string("transpose_139")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_84 = transpose(perm = transpose_84_perm_0, x = x_51_cast_fp16)[name = string("transpose_140")];
+            tensor<fp16, [1, 12, 256, 64]> attention_output_17_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_85, query = transpose_84, value = transpose_86)[name = string("attention_output_17_cast_fp16")];
+            tensor<int32, [4]> attention_output_19_perm_0 = const()[name = string("attention_output_19_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_467 = const()[name = string("op_467"), val = tensor<int32, [3]>([1, 256, 768])];
+            tensor<fp16, [1, 256, 12, 64]> attention_output_19_cast_fp16 = transpose(perm = attention_output_19_perm_0, x = attention_output_17_cast_fp16)[name = string("transpose_137")];
+            tensor<fp16, [1, 256, 768]> input_57_cast_fp16 = reshape(shape = var_467, x = attention_output_19_cast_fp16)[name = string("input_57_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_28_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_57_cast_fp16)[name = string("linear_28_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_59_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_28_cast_fp16)[name = string("input_59_cast_fp16")];
+            tensor<int32, [1]> input_61_axes_0 = const()[name = string("input_61_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> input_61_cast_fp16 = layer_norm(axes = input_61_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_59_cast_fp16)[name = string("input_61_cast_fp16")];
+            tensor<fp16, [1, 256, 2048]> linear_29_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_61_cast_fp16)[name = string("linear_29_cast_fp16")];
+            string input_65_mode_0 = const()[name = string("input_65_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 256, 2048]> input_65_cast_fp16 = gelu(mode = input_65_mode_0, x = linear_29_cast_fp16)[name = string("input_65_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_30_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_65_cast_fp16)[name = string("linear_30_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_67_cast_fp16 = add(x = linear_30_cast_fp16, y = input_61_cast_fp16)[name = string("input_67_cast_fp16")];
+            tensor<int32, [1]> hidden_states_11_axes_0 = const()[name = string("hidden_states_11_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> hidden_states_11_cast_fp16 = layer_norm(axes = hidden_states_11_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_67_cast_fp16)[name = string("hidden_states_11_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_31_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_31_cast_fp16")];
+            tensor<int32, [4]> var_518 = const()[name = string("op_518"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_63_cast_fp16 = reshape(shape = var_518, x = linear_31_cast_fp16)[name = string("x_63_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_32_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_32_cast_fp16")];
+            tensor<int32, [4]> var_527 = const()[name = string("op_527"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_67_cast_fp16 = reshape(shape = var_527, x = linear_32_cast_fp16)[name = string("x_67_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_33_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_33_cast_fp16")];
+            tensor<int32, [4]> var_536 = const()[name = string("op_536"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_71_cast_fp16 = reshape(shape = var_536, x = linear_33_cast_fp16)[name = string("x_71_cast_fp16")];
+            tensor<int32, [4]> transpose_87_perm_0 = const()[name = string("transpose_87_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_88_perm_0 = const()[name = string("transpose_88_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_89_perm_0 = const()[name = string("transpose_89_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 256, 64]> transpose_89 = transpose(perm = transpose_89_perm_0, x = x_71_cast_fp16)[name = string("transpose_134")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_88 = transpose(perm = transpose_88_perm_0, x = x_67_cast_fp16)[name = string("transpose_135")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_87 = transpose(perm = transpose_87_perm_0, x = x_63_cast_fp16)[name = string("transpose_136")];
+            tensor<fp16, [1, 12, 256, 64]> attention_output_21_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_88, query = transpose_87, value = transpose_89)[name = string("attention_output_21_cast_fp16")];
+            tensor<int32, [4]> attention_output_23_perm_0 = const()[name = string("attention_output_23_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_542 = const()[name = string("op_542"), val = tensor<int32, [3]>([1, 256, 768])];
+            tensor<fp16, [1, 256, 12, 64]> attention_output_23_cast_fp16 = transpose(perm = attention_output_23_perm_0, x = attention_output_21_cast_fp16)[name = string("transpose_133")];
+            tensor<fp16, [1, 256, 768]> input_69_cast_fp16 = reshape(shape = var_542, x = attention_output_23_cast_fp16)[name = string("input_69_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_34_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_69_cast_fp16)[name = string("linear_34_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_71_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = linear_34_cast_fp16)[name = string("input_71_cast_fp16")];
+            tensor<int32, [1]> input_73_axes_0 = const()[name = string("input_73_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> input_73_cast_fp16 = layer_norm(axes = input_73_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("input_73_cast_fp16")];
+            tensor<fp16, [1, 256, 2048]> linear_35_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_73_cast_fp16)[name = string("linear_35_cast_fp16")];
+            string input_77_mode_0 = const()[name = string("input_77_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 256, 2048]> input_77_cast_fp16 = gelu(mode = input_77_mode_0, x = linear_35_cast_fp16)[name = string("input_77_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_36_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_77_cast_fp16)[name = string("linear_36_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_79_cast_fp16 = add(x = linear_36_cast_fp16, y = input_73_cast_fp16)[name = string("input_79_cast_fp16")];
+            tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> hidden_states_13_cast_fp16 = layer_norm(axes = hidden_states_13_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_79_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_37_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_37_cast_fp16")];
+            tensor<int32, [4]> var_593 = const()[name = string("op_593"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_75_cast_fp16 = reshape(shape = var_593, x = linear_37_cast_fp16)[name = string("x_75_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_38_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_38_cast_fp16")];
+            tensor<int32, [4]> var_602 = const()[name = string("op_602"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_79_cast_fp16 = reshape(shape = var_602, x = linear_38_cast_fp16)[name = string("x_79_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_39_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_39_cast_fp16")];
+            tensor<int32, [4]> var_611 = const()[name = string("op_611"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_83_cast_fp16 = reshape(shape = var_611, x = linear_39_cast_fp16)[name = string("x_83_cast_fp16")];
+            tensor<int32, [4]> transpose_90_perm_0 = const()[name = string("transpose_90_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_91_perm_0 = const()[name = string("transpose_91_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_92_perm_0 = const()[name = string("transpose_92_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 256, 64]> transpose_92 = transpose(perm = transpose_92_perm_0, x = x_83_cast_fp16)[name = string("transpose_130")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_91 = transpose(perm = transpose_91_perm_0, x = x_79_cast_fp16)[name = string("transpose_131")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_90 = transpose(perm = transpose_90_perm_0, x = x_75_cast_fp16)[name = string("transpose_132")];
+            tensor<fp16, [1, 12, 256, 64]> attention_output_25_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_91, query = transpose_90, value = transpose_92)[name = string("attention_output_25_cast_fp16")];
+            tensor<int32, [4]> attention_output_27_perm_0 = const()[name = string("attention_output_27_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_617 = const()[name = string("op_617"), val = tensor<int32, [3]>([1, 256, 768])];
+            tensor<fp16, [1, 256, 12, 64]> attention_output_27_cast_fp16 = transpose(perm = attention_output_27_perm_0, x = attention_output_25_cast_fp16)[name = string("transpose_129")];
+            tensor<fp16, [1, 256, 768]> input_81_cast_fp16 = reshape(shape = var_617, x = attention_output_27_cast_fp16)[name = string("input_81_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_40_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_81_cast_fp16)[name = string("linear_40_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_83_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = linear_40_cast_fp16)[name = string("input_83_cast_fp16")];
+            tensor<int32, [1]> input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> input_85_cast_fp16 = layer_norm(axes = input_85_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_83_cast_fp16)[name = string("input_85_cast_fp16")];
+            tensor<fp16, [1, 256, 2048]> linear_41_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_85_cast_fp16)[name = string("linear_41_cast_fp16")];
+            string input_89_mode_0 = const()[name = string("input_89_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 256, 2048]> input_89_cast_fp16 = gelu(mode = input_89_mode_0, x = linear_41_cast_fp16)[name = string("input_89_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_42_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_89_cast_fp16)[name = string("linear_42_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_91_cast_fp16 = add(x = linear_42_cast_fp16, y = input_85_cast_fp16)[name = string("input_91_cast_fp16")];
+            tensor<int32, [1]> hidden_states_15_axes_0 = const()[name = string("hidden_states_15_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> hidden_states_15_cast_fp16 = layer_norm(axes = hidden_states_15_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_91_cast_fp16)[name = string("hidden_states_15_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_43_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_43_cast_fp16")];
+            tensor<int32, [4]> var_668 = const()[name = string("op_668"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_87_cast_fp16 = reshape(shape = var_668, x = linear_43_cast_fp16)[name = string("x_87_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_44_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_44_cast_fp16")];
+            tensor<int32, [4]> var_677 = const()[name = string("op_677"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_91_cast_fp16 = reshape(shape = var_677, x = linear_44_cast_fp16)[name = string("x_91_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_45_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_45_cast_fp16")];
+            tensor<int32, [4]> var_686 = const()[name = string("op_686"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_95_cast_fp16 = reshape(shape = var_686, x = linear_45_cast_fp16)[name = string("x_95_cast_fp16")];
+            tensor<int32, [4]> transpose_93_perm_0 = const()[name = string("transpose_93_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_94_perm_0 = const()[name = string("transpose_94_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_95_perm_0 = const()[name = string("transpose_95_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 256, 64]> transpose_95 = transpose(perm = transpose_95_perm_0, x = x_95_cast_fp16)[name = string("transpose_126")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_94 = transpose(perm = transpose_94_perm_0, x = x_91_cast_fp16)[name = string("transpose_127")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_93 = transpose(perm = transpose_93_perm_0, x = x_87_cast_fp16)[name = string("transpose_128")];
+            tensor<fp16, [1, 12, 256, 64]> attention_output_29_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_94, query = transpose_93, value = transpose_95)[name = string("attention_output_29_cast_fp16")];
+            tensor<int32, [4]> attention_output_31_perm_0 = const()[name = string("attention_output_31_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_692 = const()[name = string("op_692"), val = tensor<int32, [3]>([1, 256, 768])];
+            tensor<fp16, [1, 256, 12, 64]> attention_output_31_cast_fp16 = transpose(perm = attention_output_31_perm_0, x = attention_output_29_cast_fp16)[name = string("transpose_125")];
+            tensor<fp16, [1, 256, 768]> input_93_cast_fp16 = reshape(shape = var_692, x = attention_output_31_cast_fp16)[name = string("input_93_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_46_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_93_cast_fp16)[name = string("linear_46_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_95_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = linear_46_cast_fp16)[name = string("input_95_cast_fp16")];
+            tensor<int32, [1]> input_97_axes_0 = const()[name = string("input_97_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> input_97_cast_fp16 = layer_norm(axes = input_97_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_95_cast_fp16)[name = string("input_97_cast_fp16")];
+            tensor<fp16, [1, 256, 2048]> linear_47_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_97_cast_fp16)[name = string("linear_47_cast_fp16")];
+            string input_101_mode_0 = const()[name = string("input_101_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 256, 2048]> input_101_cast_fp16 = gelu(mode = input_101_mode_0, x = linear_47_cast_fp16)[name = string("input_101_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_48_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_101_cast_fp16)[name = string("linear_48_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_103_cast_fp16 = add(x = linear_48_cast_fp16, y = input_97_cast_fp16)[name = string("input_103_cast_fp16")];
+            tensor<int32, [1]> hidden_states_17_axes_0 = const()[name = string("hidden_states_17_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> hidden_states_17_cast_fp16 = layer_norm(axes = hidden_states_17_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_103_cast_fp16)[name = string("hidden_states_17_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_49_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_49_cast_fp16")];
+            tensor<int32, [4]> var_743 = const()[name = string("op_743"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_99_cast_fp16 = reshape(shape = var_743, x = linear_49_cast_fp16)[name = string("x_99_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_50_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_50_cast_fp16")];
+            tensor<int32, [4]> var_752 = const()[name = string("op_752"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_103_cast_fp16 = reshape(shape = var_752, x = linear_50_cast_fp16)[name = string("x_103_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_51_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_51_cast_fp16")];
+            tensor<int32, [4]> var_761 = const()[name = string("op_761"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_107_cast_fp16 = reshape(shape = var_761, x = linear_51_cast_fp16)[name = string("x_107_cast_fp16")];
+            tensor<int32, [4]> transpose_96_perm_0 = const()[name = string("transpose_96_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_97_perm_0 = const()[name = string("transpose_97_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_98_perm_0 = const()[name = string("transpose_98_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 256, 64]> transpose_98 = transpose(perm = transpose_98_perm_0, x = x_107_cast_fp16)[name = string("transpose_122")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_97 = transpose(perm = transpose_97_perm_0, x = x_103_cast_fp16)[name = string("transpose_123")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_96 = transpose(perm = transpose_96_perm_0, x = x_99_cast_fp16)[name = string("transpose_124")];
+            tensor<fp16, [1, 12, 256, 64]> attention_output_33_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_97, query = transpose_96, value = transpose_98)[name = string("attention_output_33_cast_fp16")];
+            tensor<int32, [4]> attention_output_35_perm_0 = const()[name = string("attention_output_35_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_767 = const()[name = string("op_767"), val = tensor<int32, [3]>([1, 256, 768])];
+            tensor<fp16, [1, 256, 12, 64]> attention_output_35_cast_fp16 = transpose(perm = attention_output_35_perm_0, x = attention_output_33_cast_fp16)[name = string("transpose_121")];
+            tensor<fp16, [1, 256, 768]> input_105_cast_fp16 = reshape(shape = var_767, x = attention_output_35_cast_fp16)[name = string("input_105_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_52_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_105_cast_fp16)[name = string("linear_52_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_107_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_52_cast_fp16)[name = string("input_107_cast_fp16")];
+            tensor<int32, [1]> input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> input_109_cast_fp16 = layer_norm(axes = input_109_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_107_cast_fp16)[name = string("input_109_cast_fp16")];
+            tensor<fp16, [1, 256, 2048]> linear_53_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_109_cast_fp16)[name = string("linear_53_cast_fp16")];
+            string input_113_mode_0 = const()[name = string("input_113_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 256, 2048]> input_113_cast_fp16 = gelu(mode = input_113_mode_0, x = linear_53_cast_fp16)[name = string("input_113_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_54_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_113_cast_fp16)[name = string("linear_54_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_115_cast_fp16 = add(x = linear_54_cast_fp16, y = input_109_cast_fp16)[name = string("input_115_cast_fp16")];
+            tensor<int32, [1]> hidden_states_19_axes_0 = const()[name = string("hidden_states_19_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> hidden_states_19_cast_fp16 = layer_norm(axes = hidden_states_19_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_115_cast_fp16)[name = string("hidden_states_19_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_55_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_55_cast_fp16")];
+            tensor<int32, [4]> var_818 = const()[name = string("op_818"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_111_cast_fp16 = reshape(shape = var_818, x = linear_55_cast_fp16)[name = string("x_111_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_56_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_56_cast_fp16")];
+            tensor<int32, [4]> var_827 = const()[name = string("op_827"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_115_cast_fp16 = reshape(shape = var_827, x = linear_56_cast_fp16)[name = string("x_115_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_57_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_57_cast_fp16")];
+            tensor<int32, [4]> var_836 = const()[name = string("op_836"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_119_cast_fp16 = reshape(shape = var_836, x = linear_57_cast_fp16)[name = string("x_119_cast_fp16")];
+            tensor<int32, [4]> transpose_99_perm_0 = const()[name = string("transpose_99_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_100_perm_0 = const()[name = string("transpose_100_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_101_perm_0 = const()[name = string("transpose_101_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 256, 64]> transpose_101 = transpose(perm = transpose_101_perm_0, x = x_119_cast_fp16)[name = string("transpose_118")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_100 = transpose(perm = transpose_100_perm_0, x = x_115_cast_fp16)[name = string("transpose_119")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_99 = transpose(perm = transpose_99_perm_0, x = x_111_cast_fp16)[name = string("transpose_120")];
+            tensor<fp16, [1, 12, 256, 64]> attention_output_37_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_100, query = transpose_99, value = transpose_101)[name = string("attention_output_37_cast_fp16")];
+            tensor<int32, [4]> attention_output_39_perm_0 = const()[name = string("attention_output_39_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_842 = const()[name = string("op_842"), val = tensor<int32, [3]>([1, 256, 768])];
+            tensor<fp16, [1, 256, 12, 64]> attention_output_39_cast_fp16 = transpose(perm = attention_output_39_perm_0, x = attention_output_37_cast_fp16)[name = string("transpose_117")];
+            tensor<fp16, [1, 256, 768]> input_117_cast_fp16 = reshape(shape = var_842, x = attention_output_39_cast_fp16)[name = string("input_117_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_58_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_117_cast_fp16)[name = string("linear_58_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_119_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = linear_58_cast_fp16)[name = string("input_119_cast_fp16")];
+            tensor<int32, [1]> input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> input_121_cast_fp16 = layer_norm(axes = input_121_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("input_121_cast_fp16")];
+            tensor<fp16, [1, 256, 2048]> linear_59_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_121_cast_fp16)[name = string("linear_59_cast_fp16")];
+            string input_125_mode_0 = const()[name = string("input_125_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 256, 2048]> input_125_cast_fp16 = gelu(mode = input_125_mode_0, x = linear_59_cast_fp16)[name = string("input_125_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_60_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_125_cast_fp16)[name = string("linear_60_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_127_cast_fp16 = add(x = linear_60_cast_fp16, y = input_121_cast_fp16)[name = string("input_127_cast_fp16")];
+            tensor<int32, [1]> hidden_states_21_axes_0 = const()[name = string("hidden_states_21_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> hidden_states_21_cast_fp16 = layer_norm(axes = hidden_states_21_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_127_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_61_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_61_cast_fp16")];
+            tensor<int32, [4]> var_893 = const()[name = string("op_893"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_123_cast_fp16 = reshape(shape = var_893, x = linear_61_cast_fp16)[name = string("x_123_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_62_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_62_cast_fp16")];
+            tensor<int32, [4]> var_902 = const()[name = string("op_902"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_127_cast_fp16 = reshape(shape = var_902, x = linear_62_cast_fp16)[name = string("x_127_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_63_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_63_cast_fp16")];
+            tensor<int32, [4]> var_911 = const()[name = string("op_911"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_131_cast_fp16 = reshape(shape = var_911, x = linear_63_cast_fp16)[name = string("x_131_cast_fp16")];
+            tensor<int32, [4]> transpose_102_perm_0 = const()[name = string("transpose_102_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_103_perm_0 = const()[name = string("transpose_103_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_104_perm_0 = const()[name = string("transpose_104_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 256, 64]> transpose_104 = transpose(perm = transpose_104_perm_0, x = x_131_cast_fp16)[name = string("transpose_114")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_103 = transpose(perm = transpose_103_perm_0, x = x_127_cast_fp16)[name = string("transpose_115")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_102 = transpose(perm = transpose_102_perm_0, x = x_123_cast_fp16)[name = string("transpose_116")];
+            tensor<fp16, [1, 12, 256, 64]> attention_output_41_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_103, query = transpose_102, value = transpose_104)[name = string("attention_output_41_cast_fp16")];
+            tensor<int32, [4]> attention_output_43_perm_0 = const()[name = string("attention_output_43_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_917 = const()[name = string("op_917"), val = tensor<int32, [3]>([1, 256, 768])];
+            tensor<fp16, [1, 256, 12, 64]> attention_output_43_cast_fp16 = transpose(perm = attention_output_43_perm_0, x = attention_output_41_cast_fp16)[name = string("transpose_113")];
+            tensor<fp16, [1, 256, 768]> input_129_cast_fp16 = reshape(shape = var_917, x = attention_output_43_cast_fp16)[name = string("input_129_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_64_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_129_cast_fp16)[name = string("linear_64_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_131_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = linear_64_cast_fp16)[name = string("input_131_cast_fp16")];
+            tensor<int32, [1]> input_133_axes_0 = const()[name = string("input_133_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> input_133_cast_fp16 = layer_norm(axes = input_133_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_131_cast_fp16)[name = string("input_133_cast_fp16")];
+            tensor<fp16, [1, 256, 2048]> linear_65_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_133_cast_fp16)[name = string("linear_65_cast_fp16")];
+            string input_137_mode_0 = const()[name = string("input_137_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 256, 2048]> input_137_cast_fp16 = gelu(mode = input_137_mode_0, x = linear_65_cast_fp16)[name = string("input_137_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_66_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_137_cast_fp16)[name = string("linear_66_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_139_cast_fp16 = add(x = linear_66_cast_fp16, y = input_133_cast_fp16)[name = string("input_139_cast_fp16")];
+            tensor<int32, [1]> hidden_states_axes_0 = const()[name = string("hidden_states_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> hidden_states_cast_fp16 = layer_norm(axes = hidden_states_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_139_cast_fp16)[name = string("hidden_states_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_67_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_67_cast_fp16")];
+            tensor<int32, [4]> var_968 = const()[name = string("op_968"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_135_cast_fp16 = reshape(shape = var_968, x = linear_67_cast_fp16)[name = string("x_135_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_68_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_68_cast_fp16")];
+            tensor<int32, [4]> var_977 = const()[name = string("op_977"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_139_cast_fp16 = reshape(shape = var_977, x = linear_68_cast_fp16)[name = string("x_139_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_69_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_69_cast_fp16")];
+            tensor<int32, [4]> var_986 = const()[name = string("op_986"), val = tensor<int32, [4]>([1, 256, 12, 64])];
+            tensor<fp16, [1, 256, 12, 64]> x_cast_fp16 = reshape(shape = var_986, x = linear_69_cast_fp16)[name = string("x_cast_fp16")];
+            tensor<int32, [4]> transpose_105_perm_0 = const()[name = string("transpose_105_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_106_perm_0 = const()[name = string("transpose_106_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_107_perm_0 = const()[name = string("transpose_107_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 256, 64]> transpose_107 = transpose(perm = transpose_107_perm_0, x = x_cast_fp16)[name = string("transpose_110")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_106 = transpose(perm = transpose_106_perm_0, x = x_139_cast_fp16)[name = string("transpose_111")];
+            tensor<fp16, [1, 12, 256, 64]> transpose_105 = transpose(perm = transpose_105_perm_0, x = x_135_cast_fp16)[name = string("transpose_112")];
+            tensor<fp16, [1, 12, 256, 64]> attention_output_45_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_106, query = transpose_105, value = transpose_107)[name = string("attention_output_45_cast_fp16")];
+            tensor<int32, [4]> attention_output_perm_0 = const()[name = string("attention_output_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_992 = const()[name = string("op_992"), val = tensor<int32, [3]>([1, 256, 768])];
+            tensor<fp16, [1, 256, 12, 64]> attention_output_cast_fp16 = transpose(perm = attention_output_perm_0, x = attention_output_45_cast_fp16)[name = string("transpose_109")];
+            tensor<fp16, [1, 256, 768]> input_141_cast_fp16 = reshape(shape = var_992, x = attention_output_cast_fp16)[name = string("input_141_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_70_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_141_cast_fp16)[name = string("linear_70_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_143_cast_fp16 = add(x = hidden_states_cast_fp16, y = linear_70_cast_fp16)[name = string("input_143_cast_fp16")];
+            tensor<int32, [1]> input_145_axes_0 = const()[name = string("input_145_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> input_145_cast_fp16 = layer_norm(axes = input_145_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_143_cast_fp16)[name = string("input_145_cast_fp16")];
+            tensor<fp16, [1, 256, 2048]> linear_71_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_145_cast_fp16)[name = string("linear_71_cast_fp16")];
+            string input_149_mode_0 = const()[name = string("input_149_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 256, 2048]> input_149_cast_fp16 = gelu(mode = input_149_mode_0, x = linear_71_cast_fp16)[name = string("input_149_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> linear_72_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_149_cast_fp16)[name = string("linear_72_cast_fp16")];
+            tensor<fp16, [1, 256, 768]> input_151_cast_fp16 = add(x = linear_72_cast_fp16, y = input_145_cast_fp16)[name = string("input_151_cast_fp16")];
+            tensor<int32, [1]> sequence_output_axes_0 = const()[name = string("sequence_output_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 256, 768]> sequence_output = layer_norm(axes = sequence_output_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_151_cast_fp16)[name = string("sequence_output_cast_fp16")];
+            tensor<fp16, [512, 768]> bert_encoder_weight_to_fp16 = const()[name = string("bert_encoder_weight_to_fp16"), val = tensor<fp16, [512, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11404800)))];
+            tensor<fp16, [512]> bert_encoder_bias_to_fp16 = const()[name = string("bert_encoder_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12191296)))];
+            tensor<fp16, [1, 256, 512]> linear_73_cast_fp16 = linear(bias = bert_encoder_bias_to_fp16, weight = bert_encoder_weight_to_fp16, x = sequence_output)[name = string("linear_73_cast_fp16")];
+            tensor<int32, [3]> var_1030_perm_0 = const()[name = string("op_1030_perm_0"), val = tensor<int32, [3]>([0, -1, -2])];
+            tensor<fp16, [1, 512, 256]> var_1030 = transpose(perm = var_1030_perm_0, x = linear_73_cast_fp16)[name = string("transpose_108")];
+        } -> (sequence_output, var_1030);
+}

iteration_3/compiled/bert_fp16_t256.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7569b53a68b9664e246fda171851daa1dd5f01f64aa31533dfcaf40f4034fee3
+size 12192384

iteration_3/compiled/bert_fp16_t64.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fff70f6d02af452c0cfe9133a60c0ddaea7da51bca82b19885aaf90d49533955
+size 243

iteration_3/compiled/bert_fp16_t64.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ab9e12f625df7bfa9daa01c7c3a319d095408cf032b482e2f6733748de5e850
+size 437

iteration_3/compiled/bert_fp16_t64.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,94 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 64 × 768)",
+        "shortDescription" : "",
+        "shape" : "[1, 64, 768]",
+        "name" : "sequence_output",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 512 × 64)",
+        "shortDescription" : "",
+        "shape" : "[1, 512, 64]",
+        "name" : "var_1030",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.linear" : 74,
+      "Ios18.scaledDotProductAttention" : 12,
+      "Ios18.sub" : 1,
+      "Select" : 2,
+      "Ios18.expandDims" : 2,
+      "Ios18.gelu" : 12,
+      "Ios18.gather" : 1,
+      "Ios18.add" : 27,
+      "Tile" : 1,
+      "Ios18.layerNorm" : 25,
+      "Ios18.transpose" : 49,
+      "Ios18.cast" : 5,
+      "Ios18.reshape" : 48,
+      "Ios18.greaterEqual" : 1
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2026-05-08",
+      "com.github.apple.coremltools.source" : "torch==2.11.0",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1 × 64)",
+        "shortDescription" : "",
+        "shape" : "[1, 64]",
+        "name" : "tokens",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1 × 64)",
+        "shortDescription" : "",
+        "shape" : "[1, 64]",
+        "name" : "attention_mask",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "bert_fp16_t64",
+    "method" : "predict"
+  }
+]

iteration_3/compiled/bert_fp16_t64.mlmodelc/model.mil ADDED Viewed

	@@ -0,0 +1,442 @@

+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}, {"coremltools-component-torch", "2.11.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
+{
+    func main<ios18>(tensor<int32, [1, 64]> attention_mask, tensor<int32, [1, 64]> tokens) {
+            int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)];
+            bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [178, 128]> bert_embeddings_word_embeddings_weight_to_fp16 = const()[name = string("bert_embeddings_word_embeddings_weight_to_fp16"), val = tensor<fp16, [178, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            string tokens_to_int16_dtype_0 = const()[name = string("tokens_to_int16_dtype_0"), val = string("int16")];
+            string cast_53_dtype_0 = const()[name = string("cast_53_dtype_0"), val = string("int32")];
+            int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
+            tensor<int16, [1, 64]> tokens_to_int16 = cast(dtype = tokens_to_int16_dtype_0, x = tokens)[name = string("cast_58")];
+            tensor<int32, [1, 64]> cast_53 = cast(dtype = cast_53_dtype_0, x = tokens_to_int16)[name = string("cast_57")];
+            tensor<bool, [1, 64]> greater_equal_0 = greater_equal(x = cast_53, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
+            int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(178)];
+            tensor<int32, [1, 64]> add_0 = add(x = cast_53, y = slice_by_index_0)[name = string("add_0")];
+            tensor<int32, [1, 64]> select_0 = select(a = cast_53, b = add_0, cond = greater_equal_0)[name = string("select_0")];
+            int32 inputs_embeds_cast_fp16_cast_uint16_axis_0 = const()[name = string("inputs_embeds_cast_fp16_cast_uint16_axis_0"), val = int32(0)];
+            string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")];
+            tensor<int16, [1, 64]> select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_56")];
+            tensor<fp16, [1, 64, 128]> inputs_embeds_cast_fp16_cast_uint16_cast_uint16 = gather(axis = inputs_embeds_cast_fp16_cast_uint16_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = select_0_to_int16, validate_indices = inputs_embeds_validate_indices_0, x = bert_embeddings_word_embeddings_weight_to_fp16)[name = string("inputs_embeds_cast_fp16_cast_uint16_cast_uint16")];
+            tensor<fp16, [1, 64, 128]> token_type_embeddings_1_to_fp16 = const()[name = string("token_type_embeddings_1_to_fp16"), val = tensor<fp16, [1, 64, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45696)))];
+            tensor<fp16, [1, 64, 128]> embeddings_1_cast_fp16 = add(x = inputs_embeds_cast_fp16_cast_uint16_cast_uint16, y = token_type_embeddings_1_to_fp16)[name = string("embeddings_1_cast_fp16")];
+            tensor<fp16, [1, 64, 128]> position_embeddings_1_to_fp16 = const()[name = string("position_embeddings_1_to_fp16"), val = tensor<fp16, [1, 64, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62144)))];
+            tensor<fp16, [1, 64, 128]> input_5_cast_fp16 = add(x = embeddings_1_cast_fp16, y = position_embeddings_1_to_fp16)[name = string("input_5_cast_fp16")];
+            tensor<int32, [1]> input_7_axes_0 = const()[name = string("input_7_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [128]> bert_embeddings_LayerNorm_weight_to_fp16 = const()[name = string("bert_embeddings_LayerNorm_weight_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78592)))];
+            tensor<fp16, [128]> bert_embeddings_LayerNorm_bias_to_fp16 = const()[name = string("bert_embeddings_LayerNorm_bias_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78912)))];
+            fp16 var_34_to_fp16 = const()[name = string("op_34_to_fp16"), val = fp16(0x1p-24)];
+            tensor<fp16, [1, 64, 128]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = bert_embeddings_LayerNorm_bias_to_fp16, epsilon = var_34_to_fp16, gamma = bert_embeddings_LayerNorm_weight_to_fp16, x = input_5_cast_fp16)[name = string("input_7_cast_fp16")];
+            tensor<int32, [1]> var_79_axes_0 = const()[name = string("op_79_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1, 1, 64]> var_79 = expand_dims(axes = var_79_axes_0, x = attention_mask)[name = string("op_79")];
+            tensor<int32, [1]> var_81_axes_0 = const()[name = string("op_81_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1, 1, 1, 64]> var_81 = expand_dims(axes = var_81_axes_0, x = var_79)[name = string("op_81")];
+            tensor<int32, [4]> var_90_reps_0 = const()[name = string("op_90_reps_0"), val = tensor<int32, [4]>([1, 1, 64, 1])];
+            tensor<int32, [1, 1, 64, 64]> var_90 = tile(reps = var_90_reps_0, x = var_81)[name = string("op_90")];
+            fp16 var_96_to_fp16 = const()[name = string("op_96_to_fp16"), val = fp16(0x1p+0)];
+            string var_95_to_fp16_dtype_0 = const()[name = string("op_95_to_fp16_dtype_0"), val = string("fp16")];
+            tensor<fp16, [1, 1, 64, 64]> var_90_to_fp16 = cast(dtype = var_95_to_fp16_dtype_0, x = var_90)[name = string("cast_55")];
+            tensor<fp16, [1, 1, 64, 64]> inverted_mask_cast_fp16 = sub(x = var_96_to_fp16, y = var_90_to_fp16)[name = string("inverted_mask_cast_fp16")];
+            string var_103_dtype_0 = const()[name = string("op_103_dtype_0"), val = string("bool")];
+            fp16 var_104_to_fp16 = const()[name = string("op_104_to_fp16"), val = fp16(-inf)];
+            tensor<bool, [1, 1, 64, 64]> inverted_mask_cast_fp16_to_bool = cast(dtype = var_103_dtype_0, x = inverted_mask_cast_fp16)[name = string("cast_54")];
+            tensor<fp16, [1, 1, 64, 64]> attention_mask_cast_fp16 = select(a = var_104_to_fp16, b = inverted_mask_cast_fp16, cond = inverted_mask_cast_fp16_to_bool)[name = string("attention_mask_cast_fp16")];
+            tensor<fp16, [768, 128]> bert_encoder_embedding_hidden_mapping_in_weight_to_fp16 = const()[name = string("bert_encoder_embedding_hidden_mapping_in_weight_to_fp16"), val = tensor<fp16, [768, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79232)))];
+            tensor<fp16, [768]> bert_encoder_embedding_hidden_mapping_in_bias_to_fp16 = const()[name = string("bert_encoder_embedding_hidden_mapping_in_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275904)))];
+            tensor<fp16, [1, 64, 768]> linear_0_cast_fp16 = linear(bias = bert_encoder_embedding_hidden_mapping_in_bias_to_fp16, weight = bert_encoder_embedding_hidden_mapping_in_weight_to_fp16, x = input_7_cast_fp16)[name = string("linear_0_cast_fp16")];
+            tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277504)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1457216)))];
+            tensor<fp16, [1, 64, 768]> linear_1_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_1_cast_fp16")];
+            tensor<int32, [4]> var_143 = const()[name = string("op_143"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_3_cast_fp16 = reshape(shape = var_143, x = linear_1_cast_fp16)[name = string("x_3_cast_fp16")];
+            tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1458816)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2638528)))];
+            tensor<fp16, [1, 64, 768]> linear_2_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_2_cast_fp16")];
+            tensor<int32, [4]> var_152 = const()[name = string("op_152"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_7_cast_fp16 = reshape(shape = var_152, x = linear_2_cast_fp16)[name = string("x_7_cast_fp16")];
+            tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(2640128)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3819840)))];
+            tensor<fp16, [1, 64, 768]> linear_3_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = linear_0_cast_fp16)[name = string("linear_3_cast_fp16")];
+            tensor<int32, [4]> var_161 = const()[name = string("op_161"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_11_cast_fp16 = reshape(shape = var_161, x = linear_3_cast_fp16)[name = string("x_11_cast_fp16")];
+            tensor<int32, [4]> transpose_72_perm_0 = const()[name = string("transpose_72_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_73_perm_0 = const()[name = string("transpose_73_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_74_perm_0 = const()[name = string("transpose_74_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 64, 64]> transpose_74 = transpose(perm = transpose_74_perm_0, x = x_11_cast_fp16)[name = string("transpose_154")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_73 = transpose(perm = transpose_73_perm_0, x = x_7_cast_fp16)[name = string("transpose_155")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_72 = transpose(perm = transpose_72_perm_0, x = x_3_cast_fp16)[name = string("transpose_156")];
+            tensor<fp16, [1, 12, 64, 64]> attention_output_1_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_73, query = transpose_72, value = transpose_74)[name = string("attention_output_1_cast_fp16")];
+            tensor<int32, [4]> attention_output_3_perm_0 = const()[name = string("attention_output_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_167 = const()[name = string("op_167"), val = tensor<int32, [3]>([1, 64, 768])];
+            tensor<fp16, [1, 64, 12, 64]> attention_output_3_cast_fp16 = transpose(perm = attention_output_3_perm_0, x = attention_output_1_cast_fp16)[name = string("transpose_153")];
+            tensor<fp16, [1, 64, 768]> input_9_cast_fp16 = reshape(shape = var_167, x = attention_output_3_cast_fp16)[name = string("input_9_cast_fp16")];
+            tensor<fp16, [768, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3821440)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5001152)))];
+            tensor<fp16, [1, 64, 768]> linear_4_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_9_cast_fp16)[name = string("linear_4_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_11_cast_fp16 = add(x = linear_0_cast_fp16, y = linear_4_cast_fp16)[name = string("input_11_cast_fp16")];
+            tensor<int32, [1]> input_13_axes_0 = const()[name = string("input_13_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5002752)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5004352)))];
+            fp16 var_118_to_fp16 = const()[name = string("op_118_to_fp16"), val = fp16(0x1p-24)];
+            tensor<fp16, [1, 64, 768]> input_13_cast_fp16 = layer_norm(axes = input_13_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_11_cast_fp16)[name = string("input_13_cast_fp16")];
+            tensor<fp16, [2048, 768]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16"), val = tensor<fp16, [2048, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(5005952)))];
+            tensor<fp16, [2048]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16"), val = tensor<fp16, [2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8151744)))];
+            tensor<fp16, [1, 64, 2048]> linear_5_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_13_cast_fp16)[name = string("linear_5_cast_fp16")];
+            string input_17_mode_0 = const()[name = string("input_17_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 64, 2048]> input_17_cast_fp16 = gelu(mode = input_17_mode_0, x = linear_5_cast_fp16)[name = string("input_17_cast_fp16")];
+            tensor<fp16, [768, 2048]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16"), val = tensor<fp16, [768, 2048]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8155904)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11301696)))];
+            tensor<fp16, [1, 64, 768]> linear_6_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_17_cast_fp16)[name = string("linear_6_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_19_cast_fp16 = add(x = linear_6_cast_fp16, y = input_13_cast_fp16)[name = string("input_19_cast_fp16")];
+            tensor<int32, [1]> hidden_states_3_axes_0 = const()[name = string("hidden_states_3_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11303296)))];
+            tensor<fp16, [768]> bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16 = const()[name = string("bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11304896)))];
+            tensor<fp16, [1, 64, 768]> hidden_states_3_cast_fp16 = layer_norm(axes = hidden_states_3_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_19_cast_fp16)[name = string("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_7_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_7_cast_fp16")];
+            tensor<int32, [4]> var_218 = const()[name = string("op_218"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_15_cast_fp16 = reshape(shape = var_218, x = linear_7_cast_fp16)[name = string("x_15_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_8_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_8_cast_fp16")];
+            tensor<int32, [4]> var_227 = const()[name = string("op_227"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_19_cast_fp16 = reshape(shape = var_227, x = linear_8_cast_fp16)[name = string("x_19_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_9_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_3_cast_fp16)[name = string("linear_9_cast_fp16")];
+            tensor<int32, [4]> var_236 = const()[name = string("op_236"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_23_cast_fp16 = reshape(shape = var_236, x = linear_9_cast_fp16)[name = string("x_23_cast_fp16")];
+            tensor<int32, [4]> transpose_75_perm_0 = const()[name = string("transpose_75_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_76_perm_0 = const()[name = string("transpose_76_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_77_perm_0 = const()[name = string("transpose_77_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 64, 64]> transpose_77 = transpose(perm = transpose_77_perm_0, x = x_23_cast_fp16)[name = string("transpose_150")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_76 = transpose(perm = transpose_76_perm_0, x = x_19_cast_fp16)[name = string("transpose_151")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_75 = transpose(perm = transpose_75_perm_0, x = x_15_cast_fp16)[name = string("transpose_152")];
+            tensor<fp16, [1, 12, 64, 64]> attention_output_5_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_76, query = transpose_75, value = transpose_77)[name = string("attention_output_5_cast_fp16")];
+            tensor<int32, [4]> attention_output_7_perm_0 = const()[name = string("attention_output_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_242 = const()[name = string("op_242"), val = tensor<int32, [3]>([1, 64, 768])];
+            tensor<fp16, [1, 64, 12, 64]> attention_output_7_cast_fp16 = transpose(perm = attention_output_7_perm_0, x = attention_output_5_cast_fp16)[name = string("transpose_149")];
+            tensor<fp16, [1, 64, 768]> input_21_cast_fp16 = reshape(shape = var_242, x = attention_output_7_cast_fp16)[name = string("input_21_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_10_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_21_cast_fp16)[name = string("linear_10_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_23_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = linear_10_cast_fp16)[name = string("input_23_cast_fp16")];
+            tensor<int32, [1]> input_25_axes_0 = const()[name = string("input_25_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> input_25_cast_fp16 = layer_norm(axes = input_25_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_23_cast_fp16)[name = string("input_25_cast_fp16")];
+            tensor<fp16, [1, 64, 2048]> linear_11_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_25_cast_fp16)[name = string("linear_11_cast_fp16")];
+            string input_29_mode_0 = const()[name = string("input_29_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 64, 2048]> input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = linear_11_cast_fp16)[name = string("input_29_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_12_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_29_cast_fp16)[name = string("linear_12_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_31_cast_fp16 = add(x = linear_12_cast_fp16, y = input_25_cast_fp16)[name = string("input_31_cast_fp16")];
+            tensor<int32, [1]> hidden_states_5_axes_0 = const()[name = string("hidden_states_5_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> hidden_states_5_cast_fp16 = layer_norm(axes = hidden_states_5_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_31_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_13_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_13_cast_fp16")];
+            tensor<int32, [4]> var_293 = const()[name = string("op_293"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_27_cast_fp16 = reshape(shape = var_293, x = linear_13_cast_fp16)[name = string("x_27_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_14_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_14_cast_fp16")];
+            tensor<int32, [4]> var_302 = const()[name = string("op_302"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_31_cast_fp16 = reshape(shape = var_302, x = linear_14_cast_fp16)[name = string("x_31_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_15_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_5_cast_fp16)[name = string("linear_15_cast_fp16")];
+            tensor<int32, [4]> var_311 = const()[name = string("op_311"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_35_cast_fp16 = reshape(shape = var_311, x = linear_15_cast_fp16)[name = string("x_35_cast_fp16")];
+            tensor<int32, [4]> transpose_78_perm_0 = const()[name = string("transpose_78_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_79_perm_0 = const()[name = string("transpose_79_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_80_perm_0 = const()[name = string("transpose_80_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 64, 64]> transpose_80 = transpose(perm = transpose_80_perm_0, x = x_35_cast_fp16)[name = string("transpose_146")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_79 = transpose(perm = transpose_79_perm_0, x = x_31_cast_fp16)[name = string("transpose_147")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_78 = transpose(perm = transpose_78_perm_0, x = x_27_cast_fp16)[name = string("transpose_148")];
+            tensor<fp16, [1, 12, 64, 64]> attention_output_9_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_79, query = transpose_78, value = transpose_80)[name = string("attention_output_9_cast_fp16")];
+            tensor<int32, [4]> attention_output_11_perm_0 = const()[name = string("attention_output_11_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_317 = const()[name = string("op_317"), val = tensor<int32, [3]>([1, 64, 768])];
+            tensor<fp16, [1, 64, 12, 64]> attention_output_11_cast_fp16 = transpose(perm = attention_output_11_perm_0, x = attention_output_9_cast_fp16)[name = string("transpose_145")];
+            tensor<fp16, [1, 64, 768]> input_33_cast_fp16 = reshape(shape = var_317, x = attention_output_11_cast_fp16)[name = string("input_33_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_16_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_33_cast_fp16)[name = string("linear_16_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_35_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = linear_16_cast_fp16)[name = string("input_35_cast_fp16")];
+            tensor<int32, [1]> input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> input_37_cast_fp16 = layer_norm(axes = input_37_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("input_37_cast_fp16")];
+            tensor<fp16, [1, 64, 2048]> linear_17_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_37_cast_fp16)[name = string("linear_17_cast_fp16")];
+            string input_41_mode_0 = const()[name = string("input_41_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 64, 2048]> input_41_cast_fp16 = gelu(mode = input_41_mode_0, x = linear_17_cast_fp16)[name = string("input_41_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_18_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_41_cast_fp16)[name = string("linear_18_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_43_cast_fp16 = add(x = linear_18_cast_fp16, y = input_37_cast_fp16)[name = string("input_43_cast_fp16")];
+            tensor<int32, [1]> hidden_states_7_axes_0 = const()[name = string("hidden_states_7_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> hidden_states_7_cast_fp16 = layer_norm(axes = hidden_states_7_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_43_cast_fp16)[name = string("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_19_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_19_cast_fp16")];
+            tensor<int32, [4]> var_368 = const()[name = string("op_368"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_39_cast_fp16 = reshape(shape = var_368, x = linear_19_cast_fp16)[name = string("x_39_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_20_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_20_cast_fp16")];
+            tensor<int32, [4]> var_377 = const()[name = string("op_377"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_43_cast_fp16 = reshape(shape = var_377, x = linear_20_cast_fp16)[name = string("x_43_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_21_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_21_cast_fp16")];
+            tensor<int32, [4]> var_386 = const()[name = string("op_386"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_47_cast_fp16 = reshape(shape = var_386, x = linear_21_cast_fp16)[name = string("x_47_cast_fp16")];
+            tensor<int32, [4]> transpose_81_perm_0 = const()[name = string("transpose_81_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_82_perm_0 = const()[name = string("transpose_82_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_83_perm_0 = const()[name = string("transpose_83_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 64, 64]> transpose_83 = transpose(perm = transpose_83_perm_0, x = x_47_cast_fp16)[name = string("transpose_142")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_82 = transpose(perm = transpose_82_perm_0, x = x_43_cast_fp16)[name = string("transpose_143")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_81 = transpose(perm = transpose_81_perm_0, x = x_39_cast_fp16)[name = string("transpose_144")];
+            tensor<fp16, [1, 12, 64, 64]> attention_output_13_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_82, query = transpose_81, value = transpose_83)[name = string("attention_output_13_cast_fp16")];
+            tensor<int32, [4]> attention_output_15_perm_0 = const()[name = string("attention_output_15_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_392 = const()[name = string("op_392"), val = tensor<int32, [3]>([1, 64, 768])];
+            tensor<fp16, [1, 64, 12, 64]> attention_output_15_cast_fp16 = transpose(perm = attention_output_15_perm_0, x = attention_output_13_cast_fp16)[name = string("transpose_141")];
+            tensor<fp16, [1, 64, 768]> input_45_cast_fp16 = reshape(shape = var_392, x = attention_output_15_cast_fp16)[name = string("input_45_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_22_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_45_cast_fp16)[name = string("linear_22_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_47_cast_fp16 = add(x = hidden_states_7_cast_fp16, y = linear_22_cast_fp16)[name = string("input_47_cast_fp16")];
+            tensor<int32, [1]> input_49_axes_0 = const()[name = string("input_49_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> input_49_cast_fp16 = layer_norm(axes = input_49_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_47_cast_fp16)[name = string("input_49_cast_fp16")];
+            tensor<fp16, [1, 64, 2048]> linear_23_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_49_cast_fp16)[name = string("linear_23_cast_fp16")];
+            string input_53_mode_0 = const()[name = string("input_53_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 64, 2048]> input_53_cast_fp16 = gelu(mode = input_53_mode_0, x = linear_23_cast_fp16)[name = string("input_53_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_24_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_53_cast_fp16)[name = string("linear_24_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_55_cast_fp16 = add(x = linear_24_cast_fp16, y = input_49_cast_fp16)[name = string("input_55_cast_fp16")];
+            tensor<int32, [1]> hidden_states_9_axes_0 = const()[name = string("hidden_states_9_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> hidden_states_9_cast_fp16 = layer_norm(axes = hidden_states_9_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_55_cast_fp16)[name = string("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_25_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_25_cast_fp16")];
+            tensor<int32, [4]> var_443 = const()[name = string("op_443"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_51_cast_fp16 = reshape(shape = var_443, x = linear_25_cast_fp16)[name = string("x_51_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_26_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_26_cast_fp16")];
+            tensor<int32, [4]> var_452 = const()[name = string("op_452"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_55_cast_fp16 = reshape(shape = var_452, x = linear_26_cast_fp16)[name = string("x_55_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_27_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = string("linear_27_cast_fp16")];
+            tensor<int32, [4]> var_461 = const()[name = string("op_461"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_59_cast_fp16 = reshape(shape = var_461, x = linear_27_cast_fp16)[name = string("x_59_cast_fp16")];
+            tensor<int32, [4]> transpose_84_perm_0 = const()[name = string("transpose_84_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_85_perm_0 = const()[name = string("transpose_85_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_86_perm_0 = const()[name = string("transpose_86_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 64, 64]> transpose_86 = transpose(perm = transpose_86_perm_0, x = x_59_cast_fp16)[name = string("transpose_138")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_85 = transpose(perm = transpose_85_perm_0, x = x_55_cast_fp16)[name = string("transpose_139")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_84 = transpose(perm = transpose_84_perm_0, x = x_51_cast_fp16)[name = string("transpose_140")];
+            tensor<fp16, [1, 12, 64, 64]> attention_output_17_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_85, query = transpose_84, value = transpose_86)[name = string("attention_output_17_cast_fp16")];
+            tensor<int32, [4]> attention_output_19_perm_0 = const()[name = string("attention_output_19_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_467 = const()[name = string("op_467"), val = tensor<int32, [3]>([1, 64, 768])];
+            tensor<fp16, [1, 64, 12, 64]> attention_output_19_cast_fp16 = transpose(perm = attention_output_19_perm_0, x = attention_output_17_cast_fp16)[name = string("transpose_137")];
+            tensor<fp16, [1, 64, 768]> input_57_cast_fp16 = reshape(shape = var_467, x = attention_output_19_cast_fp16)[name = string("input_57_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_28_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_57_cast_fp16)[name = string("linear_28_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_59_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_28_cast_fp16)[name = string("input_59_cast_fp16")];
+            tensor<int32, [1]> input_61_axes_0 = const()[name = string("input_61_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> input_61_cast_fp16 = layer_norm(axes = input_61_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_59_cast_fp16)[name = string("input_61_cast_fp16")];
+            tensor<fp16, [1, 64, 2048]> linear_29_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_61_cast_fp16)[name = string("linear_29_cast_fp16")];
+            string input_65_mode_0 = const()[name = string("input_65_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 64, 2048]> input_65_cast_fp16 = gelu(mode = input_65_mode_0, x = linear_29_cast_fp16)[name = string("input_65_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_30_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_65_cast_fp16)[name = string("linear_30_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_67_cast_fp16 = add(x = linear_30_cast_fp16, y = input_61_cast_fp16)[name = string("input_67_cast_fp16")];
+            tensor<int32, [1]> hidden_states_11_axes_0 = const()[name = string("hidden_states_11_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> hidden_states_11_cast_fp16 = layer_norm(axes = hidden_states_11_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_67_cast_fp16)[name = string("hidden_states_11_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_31_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_31_cast_fp16")];
+            tensor<int32, [4]> var_518 = const()[name = string("op_518"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_63_cast_fp16 = reshape(shape = var_518, x = linear_31_cast_fp16)[name = string("x_63_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_32_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_32_cast_fp16")];
+            tensor<int32, [4]> var_527 = const()[name = string("op_527"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_67_cast_fp16 = reshape(shape = var_527, x = linear_32_cast_fp16)[name = string("x_67_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_33_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = string("linear_33_cast_fp16")];
+            tensor<int32, [4]> var_536 = const()[name = string("op_536"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_71_cast_fp16 = reshape(shape = var_536, x = linear_33_cast_fp16)[name = string("x_71_cast_fp16")];
+            tensor<int32, [4]> transpose_87_perm_0 = const()[name = string("transpose_87_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_88_perm_0 = const()[name = string("transpose_88_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_89_perm_0 = const()[name = string("transpose_89_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 64, 64]> transpose_89 = transpose(perm = transpose_89_perm_0, x = x_71_cast_fp16)[name = string("transpose_134")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_88 = transpose(perm = transpose_88_perm_0, x = x_67_cast_fp16)[name = string("transpose_135")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_87 = transpose(perm = transpose_87_perm_0, x = x_63_cast_fp16)[name = string("transpose_136")];
+            tensor<fp16, [1, 12, 64, 64]> attention_output_21_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_88, query = transpose_87, value = transpose_89)[name = string("attention_output_21_cast_fp16")];
+            tensor<int32, [4]> attention_output_23_perm_0 = const()[name = string("attention_output_23_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_542 = const()[name = string("op_542"), val = tensor<int32, [3]>([1, 64, 768])];
+            tensor<fp16, [1, 64, 12, 64]> attention_output_23_cast_fp16 = transpose(perm = attention_output_23_perm_0, x = attention_output_21_cast_fp16)[name = string("transpose_133")];
+            tensor<fp16, [1, 64, 768]> input_69_cast_fp16 = reshape(shape = var_542, x = attention_output_23_cast_fp16)[name = string("input_69_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_34_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_69_cast_fp16)[name = string("linear_34_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_71_cast_fp16 = add(x = hidden_states_11_cast_fp16, y = linear_34_cast_fp16)[name = string("input_71_cast_fp16")];
+            tensor<int32, [1]> input_73_axes_0 = const()[name = string("input_73_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> input_73_cast_fp16 = layer_norm(axes = input_73_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("input_73_cast_fp16")];
+            tensor<fp16, [1, 64, 2048]> linear_35_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_73_cast_fp16)[name = string("linear_35_cast_fp16")];
+            string input_77_mode_0 = const()[name = string("input_77_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 64, 2048]> input_77_cast_fp16 = gelu(mode = input_77_mode_0, x = linear_35_cast_fp16)[name = string("input_77_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_36_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_77_cast_fp16)[name = string("linear_36_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_79_cast_fp16 = add(x = linear_36_cast_fp16, y = input_73_cast_fp16)[name = string("input_79_cast_fp16")];
+            tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> hidden_states_13_cast_fp16 = layer_norm(axes = hidden_states_13_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_79_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_37_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_37_cast_fp16")];
+            tensor<int32, [4]> var_593 = const()[name = string("op_593"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_75_cast_fp16 = reshape(shape = var_593, x = linear_37_cast_fp16)[name = string("x_75_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_38_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_38_cast_fp16")];
+            tensor<int32, [4]> var_602 = const()[name = string("op_602"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_79_cast_fp16 = reshape(shape = var_602, x = linear_38_cast_fp16)[name = string("x_79_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_39_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_13_cast_fp16)[name = string("linear_39_cast_fp16")];
+            tensor<int32, [4]> var_611 = const()[name = string("op_611"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_83_cast_fp16 = reshape(shape = var_611, x = linear_39_cast_fp16)[name = string("x_83_cast_fp16")];
+            tensor<int32, [4]> transpose_90_perm_0 = const()[name = string("transpose_90_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_91_perm_0 = const()[name = string("transpose_91_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_92_perm_0 = const()[name = string("transpose_92_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 64, 64]> transpose_92 = transpose(perm = transpose_92_perm_0, x = x_83_cast_fp16)[name = string("transpose_130")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_91 = transpose(perm = transpose_91_perm_0, x = x_79_cast_fp16)[name = string("transpose_131")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_90 = transpose(perm = transpose_90_perm_0, x = x_75_cast_fp16)[name = string("transpose_132")];
+            tensor<fp16, [1, 12, 64, 64]> attention_output_25_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_91, query = transpose_90, value = transpose_92)[name = string("attention_output_25_cast_fp16")];
+            tensor<int32, [4]> attention_output_27_perm_0 = const()[name = string("attention_output_27_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_617 = const()[name = string("op_617"), val = tensor<int32, [3]>([1, 64, 768])];
+            tensor<fp16, [1, 64, 12, 64]> attention_output_27_cast_fp16 = transpose(perm = attention_output_27_perm_0, x = attention_output_25_cast_fp16)[name = string("transpose_129")];
+            tensor<fp16, [1, 64, 768]> input_81_cast_fp16 = reshape(shape = var_617, x = attention_output_27_cast_fp16)[name = string("input_81_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_40_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_81_cast_fp16)[name = string("linear_40_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_83_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = linear_40_cast_fp16)[name = string("input_83_cast_fp16")];
+            tensor<int32, [1]> input_85_axes_0 = const()[name = string("input_85_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> input_85_cast_fp16 = layer_norm(axes = input_85_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_83_cast_fp16)[name = string("input_85_cast_fp16")];
+            tensor<fp16, [1, 64, 2048]> linear_41_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_85_cast_fp16)[name = string("linear_41_cast_fp16")];
+            string input_89_mode_0 = const()[name = string("input_89_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 64, 2048]> input_89_cast_fp16 = gelu(mode = input_89_mode_0, x = linear_41_cast_fp16)[name = string("input_89_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_42_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_89_cast_fp16)[name = string("linear_42_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_91_cast_fp16 = add(x = linear_42_cast_fp16, y = input_85_cast_fp16)[name = string("input_91_cast_fp16")];
+            tensor<int32, [1]> hidden_states_15_axes_0 = const()[name = string("hidden_states_15_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> hidden_states_15_cast_fp16 = layer_norm(axes = hidden_states_15_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_91_cast_fp16)[name = string("hidden_states_15_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_43_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_43_cast_fp16")];
+            tensor<int32, [4]> var_668 = const()[name = string("op_668"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_87_cast_fp16 = reshape(shape = var_668, x = linear_43_cast_fp16)[name = string("x_87_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_44_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_44_cast_fp16")];
+            tensor<int32, [4]> var_677 = const()[name = string("op_677"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_91_cast_fp16 = reshape(shape = var_677, x = linear_44_cast_fp16)[name = string("x_91_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_45_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_15_cast_fp16)[name = string("linear_45_cast_fp16")];
+            tensor<int32, [4]> var_686 = const()[name = string("op_686"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_95_cast_fp16 = reshape(shape = var_686, x = linear_45_cast_fp16)[name = string("x_95_cast_fp16")];
+            tensor<int32, [4]> transpose_93_perm_0 = const()[name = string("transpose_93_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_94_perm_0 = const()[name = string("transpose_94_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_95_perm_0 = const()[name = string("transpose_95_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 64, 64]> transpose_95 = transpose(perm = transpose_95_perm_0, x = x_95_cast_fp16)[name = string("transpose_126")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_94 = transpose(perm = transpose_94_perm_0, x = x_91_cast_fp16)[name = string("transpose_127")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_93 = transpose(perm = transpose_93_perm_0, x = x_87_cast_fp16)[name = string("transpose_128")];
+            tensor<fp16, [1, 12, 64, 64]> attention_output_29_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_94, query = transpose_93, value = transpose_95)[name = string("attention_output_29_cast_fp16")];
+            tensor<int32, [4]> attention_output_31_perm_0 = const()[name = string("attention_output_31_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_692 = const()[name = string("op_692"), val = tensor<int32, [3]>([1, 64, 768])];
+            tensor<fp16, [1, 64, 12, 64]> attention_output_31_cast_fp16 = transpose(perm = attention_output_31_perm_0, x = attention_output_29_cast_fp16)[name = string("transpose_125")];
+            tensor<fp16, [1, 64, 768]> input_93_cast_fp16 = reshape(shape = var_692, x = attention_output_31_cast_fp16)[name = string("input_93_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_46_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_93_cast_fp16)[name = string("linear_46_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_95_cast_fp16 = add(x = hidden_states_15_cast_fp16, y = linear_46_cast_fp16)[name = string("input_95_cast_fp16")];
+            tensor<int32, [1]> input_97_axes_0 = const()[name = string("input_97_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> input_97_cast_fp16 = layer_norm(axes = input_97_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_95_cast_fp16)[name = string("input_97_cast_fp16")];
+            tensor<fp16, [1, 64, 2048]> linear_47_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_97_cast_fp16)[name = string("linear_47_cast_fp16")];
+            string input_101_mode_0 = const()[name = string("input_101_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 64, 2048]> input_101_cast_fp16 = gelu(mode = input_101_mode_0, x = linear_47_cast_fp16)[name = string("input_101_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_48_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_101_cast_fp16)[name = string("linear_48_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_103_cast_fp16 = add(x = linear_48_cast_fp16, y = input_97_cast_fp16)[name = string("input_103_cast_fp16")];
+            tensor<int32, [1]> hidden_states_17_axes_0 = const()[name = string("hidden_states_17_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> hidden_states_17_cast_fp16 = layer_norm(axes = hidden_states_17_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_103_cast_fp16)[name = string("hidden_states_17_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_49_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_49_cast_fp16")];
+            tensor<int32, [4]> var_743 = const()[name = string("op_743"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_99_cast_fp16 = reshape(shape = var_743, x = linear_49_cast_fp16)[name = string("x_99_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_50_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_50_cast_fp16")];
+            tensor<int32, [4]> var_752 = const()[name = string("op_752"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_103_cast_fp16 = reshape(shape = var_752, x = linear_50_cast_fp16)[name = string("x_103_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_51_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_17_cast_fp16)[name = string("linear_51_cast_fp16")];
+            tensor<int32, [4]> var_761 = const()[name = string("op_761"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_107_cast_fp16 = reshape(shape = var_761, x = linear_51_cast_fp16)[name = string("x_107_cast_fp16")];
+            tensor<int32, [4]> transpose_96_perm_0 = const()[name = string("transpose_96_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_97_perm_0 = const()[name = string("transpose_97_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_98_perm_0 = const()[name = string("transpose_98_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 64, 64]> transpose_98 = transpose(perm = transpose_98_perm_0, x = x_107_cast_fp16)[name = string("transpose_122")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_97 = transpose(perm = transpose_97_perm_0, x = x_103_cast_fp16)[name = string("transpose_123")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_96 = transpose(perm = transpose_96_perm_0, x = x_99_cast_fp16)[name = string("transpose_124")];
+            tensor<fp16, [1, 12, 64, 64]> attention_output_33_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_97, query = transpose_96, value = transpose_98)[name = string("attention_output_33_cast_fp16")];
+            tensor<int32, [4]> attention_output_35_perm_0 = const()[name = string("attention_output_35_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_767 = const()[name = string("op_767"), val = tensor<int32, [3]>([1, 64, 768])];
+            tensor<fp16, [1, 64, 12, 64]> attention_output_35_cast_fp16 = transpose(perm = attention_output_35_perm_0, x = attention_output_33_cast_fp16)[name = string("transpose_121")];
+            tensor<fp16, [1, 64, 768]> input_105_cast_fp16 = reshape(shape = var_767, x = attention_output_35_cast_fp16)[name = string("input_105_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_52_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_105_cast_fp16)[name = string("linear_52_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_107_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_52_cast_fp16)[name = string("input_107_cast_fp16")];
+            tensor<int32, [1]> input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> input_109_cast_fp16 = layer_norm(axes = input_109_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_107_cast_fp16)[name = string("input_109_cast_fp16")];
+            tensor<fp16, [1, 64, 2048]> linear_53_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_109_cast_fp16)[name = string("linear_53_cast_fp16")];
+            string input_113_mode_0 = const()[name = string("input_113_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 64, 2048]> input_113_cast_fp16 = gelu(mode = input_113_mode_0, x = linear_53_cast_fp16)[name = string("input_113_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_54_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_113_cast_fp16)[name = string("linear_54_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_115_cast_fp16 = add(x = linear_54_cast_fp16, y = input_109_cast_fp16)[name = string("input_115_cast_fp16")];
+            tensor<int32, [1]> hidden_states_19_axes_0 = const()[name = string("hidden_states_19_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> hidden_states_19_cast_fp16 = layer_norm(axes = hidden_states_19_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_115_cast_fp16)[name = string("hidden_states_19_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_55_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_55_cast_fp16")];
+            tensor<int32, [4]> var_818 = const()[name = string("op_818"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_111_cast_fp16 = reshape(shape = var_818, x = linear_55_cast_fp16)[name = string("x_111_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_56_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_56_cast_fp16")];
+            tensor<int32, [4]> var_827 = const()[name = string("op_827"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_115_cast_fp16 = reshape(shape = var_827, x = linear_56_cast_fp16)[name = string("x_115_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_57_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_19_cast_fp16)[name = string("linear_57_cast_fp16")];
+            tensor<int32, [4]> var_836 = const()[name = string("op_836"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_119_cast_fp16 = reshape(shape = var_836, x = linear_57_cast_fp16)[name = string("x_119_cast_fp16")];
+            tensor<int32, [4]> transpose_99_perm_0 = const()[name = string("transpose_99_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_100_perm_0 = const()[name = string("transpose_100_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_101_perm_0 = const()[name = string("transpose_101_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 64, 64]> transpose_101 = transpose(perm = transpose_101_perm_0, x = x_119_cast_fp16)[name = string("transpose_118")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_100 = transpose(perm = transpose_100_perm_0, x = x_115_cast_fp16)[name = string("transpose_119")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_99 = transpose(perm = transpose_99_perm_0, x = x_111_cast_fp16)[name = string("transpose_120")];
+            tensor<fp16, [1, 12, 64, 64]> attention_output_37_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_100, query = transpose_99, value = transpose_101)[name = string("attention_output_37_cast_fp16")];
+            tensor<int32, [4]> attention_output_39_perm_0 = const()[name = string("attention_output_39_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_842 = const()[name = string("op_842"), val = tensor<int32, [3]>([1, 64, 768])];
+            tensor<fp16, [1, 64, 12, 64]> attention_output_39_cast_fp16 = transpose(perm = attention_output_39_perm_0, x = attention_output_37_cast_fp16)[name = string("transpose_117")];
+            tensor<fp16, [1, 64, 768]> input_117_cast_fp16 = reshape(shape = var_842, x = attention_output_39_cast_fp16)[name = string("input_117_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_58_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_117_cast_fp16)[name = string("linear_58_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_119_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = linear_58_cast_fp16)[name = string("input_119_cast_fp16")];
+            tensor<int32, [1]> input_121_axes_0 = const()[name = string("input_121_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> input_121_cast_fp16 = layer_norm(axes = input_121_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_119_cast_fp16)[name = string("input_121_cast_fp16")];
+            tensor<fp16, [1, 64, 2048]> linear_59_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_121_cast_fp16)[name = string("linear_59_cast_fp16")];
+            string input_125_mode_0 = const()[name = string("input_125_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 64, 2048]> input_125_cast_fp16 = gelu(mode = input_125_mode_0, x = linear_59_cast_fp16)[name = string("input_125_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_60_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_125_cast_fp16)[name = string("linear_60_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_127_cast_fp16 = add(x = linear_60_cast_fp16, y = input_121_cast_fp16)[name = string("input_127_cast_fp16")];
+            tensor<int32, [1]> hidden_states_21_axes_0 = const()[name = string("hidden_states_21_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> hidden_states_21_cast_fp16 = layer_norm(axes = hidden_states_21_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_127_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_61_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_61_cast_fp16")];
+            tensor<int32, [4]> var_893 = const()[name = string("op_893"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_123_cast_fp16 = reshape(shape = var_893, x = linear_61_cast_fp16)[name = string("x_123_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_62_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_62_cast_fp16")];
+            tensor<int32, [4]> var_902 = const()[name = string("op_902"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_127_cast_fp16 = reshape(shape = var_902, x = linear_62_cast_fp16)[name = string("x_127_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_63_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_21_cast_fp16)[name = string("linear_63_cast_fp16")];
+            tensor<int32, [4]> var_911 = const()[name = string("op_911"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_131_cast_fp16 = reshape(shape = var_911, x = linear_63_cast_fp16)[name = string("x_131_cast_fp16")];
+            tensor<int32, [4]> transpose_102_perm_0 = const()[name = string("transpose_102_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_103_perm_0 = const()[name = string("transpose_103_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_104_perm_0 = const()[name = string("transpose_104_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 64, 64]> transpose_104 = transpose(perm = transpose_104_perm_0, x = x_131_cast_fp16)[name = string("transpose_114")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_103 = transpose(perm = transpose_103_perm_0, x = x_127_cast_fp16)[name = string("transpose_115")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_102 = transpose(perm = transpose_102_perm_0, x = x_123_cast_fp16)[name = string("transpose_116")];
+            tensor<fp16, [1, 12, 64, 64]> attention_output_41_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_103, query = transpose_102, value = transpose_104)[name = string("attention_output_41_cast_fp16")];
+            tensor<int32, [4]> attention_output_43_perm_0 = const()[name = string("attention_output_43_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_917 = const()[name = string("op_917"), val = tensor<int32, [3]>([1, 64, 768])];
+            tensor<fp16, [1, 64, 12, 64]> attention_output_43_cast_fp16 = transpose(perm = attention_output_43_perm_0, x = attention_output_41_cast_fp16)[name = string("transpose_113")];
+            tensor<fp16, [1, 64, 768]> input_129_cast_fp16 = reshape(shape = var_917, x = attention_output_43_cast_fp16)[name = string("input_129_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_64_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_129_cast_fp16)[name = string("linear_64_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_131_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = linear_64_cast_fp16)[name = string("input_131_cast_fp16")];
+            tensor<int32, [1]> input_133_axes_0 = const()[name = string("input_133_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> input_133_cast_fp16 = layer_norm(axes = input_133_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_131_cast_fp16)[name = string("input_133_cast_fp16")];
+            tensor<fp16, [1, 64, 2048]> linear_65_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_133_cast_fp16)[name = string("linear_65_cast_fp16")];
+            string input_137_mode_0 = const()[name = string("input_137_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 64, 2048]> input_137_cast_fp16 = gelu(mode = input_137_mode_0, x = linear_65_cast_fp16)[name = string("input_137_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_66_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_137_cast_fp16)[name = string("linear_66_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_139_cast_fp16 = add(x = linear_66_cast_fp16, y = input_133_cast_fp16)[name = string("input_139_cast_fp16")];
+            tensor<int32, [1]> hidden_states_axes_0 = const()[name = string("hidden_states_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> hidden_states_cast_fp16 = layer_norm(axes = hidden_states_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_139_cast_fp16)[name = string("hidden_states_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_67_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_query_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_67_cast_fp16")];
+            tensor<int32, [4]> var_968 = const()[name = string("op_968"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_135_cast_fp16 = reshape(shape = var_968, x = linear_67_cast_fp16)[name = string("x_135_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_68_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_key_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_68_cast_fp16")];
+            tensor<int32, [4]> var_977 = const()[name = string("op_977"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_139_cast_fp16 = reshape(shape = var_977, x = linear_68_cast_fp16)[name = string("x_139_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_69_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_value_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_69_cast_fp16")];
+            tensor<int32, [4]> var_986 = const()[name = string("op_986"), val = tensor<int32, [4]>([1, 64, 12, 64])];
+            tensor<fp16, [1, 64, 12, 64]> x_cast_fp16 = reshape(shape = var_986, x = linear_69_cast_fp16)[name = string("x_cast_fp16")];
+            tensor<int32, [4]> transpose_105_perm_0 = const()[name = string("transpose_105_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_106_perm_0 = const()[name = string("transpose_106_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [4]> transpose_107_perm_0 = const()[name = string("transpose_107_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, [1, 12, 64, 64]> transpose_107 = transpose(perm = transpose_107_perm_0, x = x_cast_fp16)[name = string("transpose_110")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_106 = transpose(perm = transpose_106_perm_0, x = x_139_cast_fp16)[name = string("transpose_111")];
+            tensor<fp16, [1, 12, 64, 64]> transpose_105 = transpose(perm = transpose_105_perm_0, x = x_135_cast_fp16)[name = string("transpose_112")];
+            tensor<fp16, [1, 12, 64, 64]> attention_output_45_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_106, query = transpose_105, value = transpose_107)[name = string("attention_output_45_cast_fp16")];
+            tensor<int32, [4]> attention_output_perm_0 = const()[name = string("attention_output_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_992 = const()[name = string("op_992"), val = tensor<int32, [3]>([1, 64, 768])];
+            tensor<fp16, [1, 64, 12, 64]> attention_output_cast_fp16 = transpose(perm = attention_output_perm_0, x = attention_output_45_cast_fp16)[name = string("transpose_109")];
+            tensor<fp16, [1, 64, 768]> input_141_cast_fp16 = reshape(shape = var_992, x = attention_output_cast_fp16)[name = string("input_141_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_70_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_dense_weight_to_fp16, x = input_141_cast_fp16)[name = string("linear_70_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_143_cast_fp16 = add(x = hidden_states_cast_fp16, y = linear_70_cast_fp16)[name = string("input_143_cast_fp16")];
+            tensor<int32, [1]> input_145_axes_0 = const()[name = string("input_145_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> input_145_cast_fp16 = layer_norm(axes = input_145_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_attention_LayerNorm_weight_to_fp16, x = input_143_cast_fp16)[name = string("input_145_cast_fp16")];
+            tensor<fp16, [1, 64, 2048]> linear_71_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_weight_to_fp16, x = input_145_cast_fp16)[name = string("linear_71_cast_fp16")];
+            string input_149_mode_0 = const()[name = string("input_149_mode_0"), val = string("TANH_APPROXIMATION")];
+            tensor<fp16, [1, 64, 2048]> input_149_cast_fp16 = gelu(mode = input_149_mode_0, x = linear_71_cast_fp16)[name = string("input_149_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> linear_72_cast_fp16 = linear(bias = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_bias_to_fp16, weight = bert_encoder_albert_layer_groups_0_albert_layers_0_ffn_output_weight_to_fp16, x = input_149_cast_fp16)[name = string("linear_72_cast_fp16")];
+            tensor<fp16, [1, 64, 768]> input_151_cast_fp16 = add(x = linear_72_cast_fp16, y = input_145_cast_fp16)[name = string("input_151_cast_fp16")];
+            tensor<int32, [1]> sequence_output_axes_0 = const()[name = string("sequence_output_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [1, 64, 768]> sequence_output = layer_norm(axes = sequence_output_axes_0, beta = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_bias_to_fp16, epsilon = var_118_to_fp16, gamma = bert_encoder_albert_layer_groups_0_albert_layers_0_full_layer_layer_norm_weight_to_fp16, x = input_151_cast_fp16)[name = string("sequence_output_cast_fp16")];
+            tensor<fp16, [512, 768]> bert_encoder_weight_to_fp16 = const()[name = string("bert_encoder_weight_to_fp16"), val = tensor<fp16, [512, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11306496)))];
+            tensor<fp16, [512]> bert_encoder_bias_to_fp16 = const()[name = string("bert_encoder_bias_to_fp16"), val = tensor<fp16, [512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12092992)))];
+            tensor<fp16, [1, 64, 512]> linear_73_cast_fp16 = linear(bias = bert_encoder_bias_to_fp16, weight = bert_encoder_weight_to_fp16, x = sequence_output)[name = string("linear_73_cast_fp16")];
+            tensor<int32, [3]> var_1030_perm_0 = const()[name = string("op_1030_perm_0"), val = tensor<int32, [3]>([0, -1, -2])];
+            tensor<fp16, [1, 512, 64]> var_1030 = transpose(perm = var_1030_perm_0, x = linear_73_cast_fp16)[name = string("transpose_108")];
+        } -> (sequence_output, var_1030);
+}

iteration_3/compiled/bert_fp16_t64.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:629b75e73fbceeb89e92b0b85548bde59918208b424b8d6467202d72d82629b2
+size 12094080

iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc3f372a74ac4a7096282d6c8b15aca657091562cd9968823195b79b46634951
+size 243

iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c248985ef0758e1fdb436873593f6c37e0a645295dbdcda5bdc8f1c0b6d9efe8
+size 462

iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,110 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 256]",
+        "name" : "var_6225",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.expandDims" : 16,
+      "Ios18.softmax" : 24,
+      "Ios18.mul" : 117,
+      "Ios18.matmul" : 48,
+      "Ios16.reduceMean" : 8,
+      "Split" : 72,
+      "Tile" : 16,
+      "Ios18.add" : 188,
+      "Ios16.reduceSum" : 8,
+      "Ios18.layerNorm" : 24,
+      "Ios18.reshape" : 102,
+      "Ios18.linear" : 143,
+      "Ios18.conv" : 8,
+      "Ios18.gelu" : 41,
+      "Ios18.sub" : 8,
+      "Ios18.concat" : 8,
+      "Stack" : 8,
+      "Ios18.transpose" : 216,
+      "Ios18.cast" : 4,
+      "Ios18.sliceByIndex" : 4
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2026-05-08",
+      "com.github.apple.coremltools.source" : "torch==2.11.0",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 256]",
+        "name" : "noise_init",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 4 × 1 × 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[4, 1, 1, 256]",
+        "name" : "noises_aux",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 128 × 768)",
+        "shortDescription" : "",
+        "shape" : "[1, 128, 768]",
+        "name" : "embedding",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 256]",
+        "name" : "features",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "fused_diffusion_sampler_fp16_t128",
+    "method" : "predict"
+  }
+]

iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

iteration_3/compiled/fused_diffusion_sampler_fp16_t128.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f823b5c638d2eb2fd91bf8e4efe4a90b2e1d3d9e2f5ab40e7e93cb03cd212aca
+size 49361856

iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2455735a48a29c69ac89e1af3a8255d27c8900fc1a8a9818fa2b7482ba74ed20
+size 243

iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6a02ae96aa0d591c0718c7e108d017012d034e6511a5a9132b62d9120b1a4db
+size 462

iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,110 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 256]",
+        "name" : "var_6225",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.expandDims" : 16,
+      "Ios18.softmax" : 24,
+      "Ios18.mul" : 117,
+      "Ios18.matmul" : 48,
+      "Ios16.reduceMean" : 8,
+      "Split" : 72,
+      "Tile" : 16,
+      "Ios18.add" : 188,
+      "Ios16.reduceSum" : 8,
+      "Ios18.layerNorm" : 24,
+      "Ios18.reshape" : 102,
+      "Ios18.linear" : 143,
+      "Ios18.conv" : 8,
+      "Ios18.gelu" : 41,
+      "Ios18.sub" : 8,
+      "Ios18.concat" : 8,
+      "Stack" : 8,
+      "Ios18.transpose" : 216,
+      "Ios18.cast" : 4,
+      "Ios18.sliceByIndex" : 4
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2026-05-08",
+      "com.github.apple.coremltools.source" : "torch==2.11.0",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 256]",
+        "name" : "noise_init",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 4 × 1 × 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[4, 1, 1, 256]",
+        "name" : "noises_aux",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 256 × 768)",
+        "shortDescription" : "",
+        "shape" : "[1, 256, 768]",
+        "name" : "embedding",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 256]",
+        "name" : "features",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "fused_diffusion_sampler_fp16_t256",
+    "method" : "predict"
+  }
+]

iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

iteration_3/compiled/fused_diffusion_sampler_fp16_t256.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f823b5c638d2eb2fd91bf8e4efe4a90b2e1d3d9e2f5ab40e7e93cb03cd212aca
+size 49361856

iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:554dd7f0ac5139918b06d99c98e202a58621a5783502a2c6585851ae475ca47e
+size 243

iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f8eb46b781de148bb4eb0c62576773d51dddbf26741f916f59d5ae2b8778795
+size 461

iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,110 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 256]",
+        "name" : "var_6225",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.expandDims" : 16,
+      "Ios18.softmax" : 24,
+      "Ios18.mul" : 117,
+      "Ios18.matmul" : 48,
+      "Ios16.reduceMean" : 8,
+      "Split" : 72,
+      "Tile" : 16,
+      "Ios18.add" : 188,
+      "Ios16.reduceSum" : 8,
+      "Ios18.layerNorm" : 24,
+      "Ios18.reshape" : 102,
+      "Ios18.linear" : 143,
+      "Ios18.conv" : 8,
+      "Ios18.gelu" : 41,
+      "Ios18.sub" : 8,
+      "Ios18.concat" : 8,
+      "Stack" : 8,
+      "Ios18.transpose" : 216,
+      "Ios18.cast" : 4,
+      "Ios18.sliceByIndex" : 4
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2026-05-08",
+      "com.github.apple.coremltools.source" : "torch==2.11.0",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 256]",
+        "name" : "noise_init",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 4 × 1 × 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[4, 1, 1, 256]",
+        "name" : "noises_aux",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 64 × 768)",
+        "shortDescription" : "",
+        "shape" : "[1, 64, 768]",
+        "name" : "embedding",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 256)",
+        "shortDescription" : "",
+        "shape" : "[1, 256]",
+        "name" : "features",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "fused_diffusion_sampler_fp16_t64",
+    "method" : "predict"
+  }
+]

iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

iteration_3/compiled/fused_diffusion_sampler_fp16_t64.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f823b5c638d2eb2fd91bf8e4efe4a90b2e1d3d9e2f5ab40e7e93cb03cd212aca
+size 49361856

iteration_3/packages/.DS_Store CHANGED Viewed

Binary files a/iteration_3/packages/.DS_Store and b/iteration_3/packages/.DS_Store differ

iteration_3/packages/bert_fp16_t128.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86a52d9bd4b18a4a30aeb17118ca75cd57c729e45db4f4555f9b9745d33fa48a
+size 85782

iteration_3/packages/bert_fp16_t128.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ff3ca8fac0332427ddfe5e78954382359d26516284113001a7484b60455eb10
+size 12126848

iteration_3/packages/bert_fp16_t128.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "8196AB77-8508-4F1F-8211-7AC9A128F5E9": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "CBF6095A-75FA-4003-8CB6-EB68D8C98F8F": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "CBF6095A-75FA-4003-8CB6-EB68D8C98F8F"
+}

iteration_3/packages/bert_fp16_t256.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:96c9d6993382797abc86b9e8fae92f3af3a1cf0dca68c77eedbe5842f7f56706
+size 85782

iteration_3/packages/bert_fp16_t256.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7569b53a68b9664e246fda171851daa1dd5f01f64aa31533dfcaf40f4034fee3
+size 12192384

iteration_3/packages/bert_fp16_t256.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "0DB32ECF-03B7-442F-883B-BAD01666A6EB": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        },
+        "D3449315-4391-4FD3-A386-218E0755ACD4": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        }
+    },
+    "rootModelIdentifier": "0DB32ECF-03B7-442F-883B-BAD01666A6EB"
+}

iteration_3/packages/bert_fp16_t64.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ce83515fe05aeb2fab52063ca9a04aa700254ca393d0202e259ab3f60c0b99c
+size 85458

iteration_3/packages/bert_fp16_t64.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:629b75e73fbceeb89e92b0b85548bde59918208b424b8d6467202d72d82629b2
+size 12094080

iteration_3/packages/bert_fp16_t64.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "7919D85D-6DA9-40DE-8836-373256963D46": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "C58FFFDE-2C63-420D-A908-5E1FD94373FB": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "C58FFFDE-2C63-420D-A908-5E1FD94373FB"
+}

iteration_3/packages/fused_diffusion_sampler_fp16_t128.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec9d3ede7bc94cf7ea72a865f1997159b7916d7549e9639b08a50ecb0ccee874
+size 312430

iteration_3/packages/fused_diffusion_sampler_fp16_t128.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f823b5c638d2eb2fd91bf8e4efe4a90b2e1d3d9e2f5ab40e7e93cb03cd212aca
+size 49361856

iteration_3/packages/fused_diffusion_sampler_fp16_t128.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "0AD329AC-53BC-4290-912D-058CBA5BA5D7": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        },
+        "9C5F32F4-A05E-4BF3-9782-D67784A8D9D5": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        }
+    },
+    "rootModelIdentifier": "0AD329AC-53BC-4290-912D-058CBA5BA5D7"
+}

iteration_3/packages/fused_diffusion_sampler_fp16_t256.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab926a4ef86b086e32276632d7b6232c2b8b31566641a82a9cf5d51dfa92431f
+size 312430

iteration_3/packages/fused_diffusion_sampler_fp16_t256.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f823b5c638d2eb2fd91bf8e4efe4a90b2e1d3d9e2f5ab40e7e93cb03cd212aca
+size 49361856

iteration_3/packages/fused_diffusion_sampler_fp16_t256.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "09308FB4-D32A-41FF-B79D-FAA53069F9CC": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        },
+        "76E947D9-F207-4B3D-93D4-708641940DCF": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        }
+    },
+    "rootModelIdentifier": "09308FB4-D32A-41FF-B79D-FAA53069F9CC"
+}

iteration_3/packages/fused_diffusion_sampler_fp16_t64.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8aed8b7b3017488f499400131aa62942687ab060d5a2bf628e1fc9054b50569
+size 311547

iteration_3/packages/fused_diffusion_sampler_fp16_t64.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f823b5c638d2eb2fd91bf8e4efe4a90b2e1d3d9e2f5ab40e7e93cb03cd212aca
+size 49361856