remove individual files

Files changed (5) hide show

analytics/coremldata.bin +0 -3
coremldata.bin +0 -3
metadata.json +0 -91
model.mil +0 -204
weights/weight.bin +0 -3

analytics/coremldata.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a3dfa7b616cecfd9db758ddd58cc951de5f291b7e85da39107f342a59515163f
-size 241

coremldata.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e2155db28e8ab643d8179098242a8e33dc0634d3e005a6bd033ab81889472aa3
-size 459

metadata.json DELETED Viewed

@@ -1,91 +0,0 @@
-[
-  {
-    "shortDescription" : "A model that segments any text",
-    "metadataOutputVersion" : "3.0",
-    "outputSchema" : [
-      {
-        "hasShapeFlexibility" : "0",
-        "isOptional" : "0",
-        "dataType" : "Float16",
-        "formattedType" : "MultiArray (Float16 1 × 512 × 1)",
-        "shortDescription" : "",
-        "shape" : "[1, 512, 1]",
-        "name" : "output",
-        "type" : "MultiArray"
-      }
-    ],
-    "version" : "1",
-    "modelParameters" : [
-    ],
-    "author" : "Salesforce Inc",
-    "specificationVersion" : 9,
-    "storagePrecision" : "Mixed (Float16, Int8)",
-    "mlProgramOperationTypeHistogram" : {
-      "Ios18.linear" : 19,
-      "Ios18.notEqual" : 1,
-      "Ios18.scaledDotProductAttention" : 3,
-      "Ios18.expandDims" : 2,
-      "Select" : 3,
-      "Ios18.sub" : 1,
-      "Ios18.gelu" : 3,
-      "Ios18.gather" : 2,
-      "Ios16.cumsum" : 1,
-      "Ios18.add" : 11,
-      "Tile" : 1,
-      "Ios18.layerNorm" : 7,
-      "Ios18.cast" : 4,
-      "Ios18.transpose" : 12,
-      "Ios18.constexprBlockwiseShiftScale" : 21,
-      "Ios18.greaterEqual" : 2,
-      "Ios18.reshape" : 12,
-      "Ios18.mul" : 1
-    },
-    "computePrecision" : "Mixed (Float16, Int32, UInt16)",
-    "stateSchema" : [
-    ],
-    "isUpdatable" : "0",
-    "availability" : {
-      "macOS" : "15.0",
-      "tvOS" : "18.0",
-      "visionOS" : "2.0",
-      "watchOS" : "11.0",
-      "iOS" : "18.0",
-      "macCatalyst" : "18.0"
-    },
-    "modelType" : {
-      "name" : "MLModelType_mlProgram"
-    },
-    "inputSchema" : [
-      {
-        "hasShapeFlexibility" : "0",
-        "isOptional" : "0",
-        "dataType" : "Int32",
-        "formattedType" : "MultiArray (Int32 1 × 512)",
-        "shortDescription" : "",
-        "shape" : "[1, 512]",
-        "name" : "input_ids",
-        "type" : "MultiArray"
-      },
-      {
-        "hasShapeFlexibility" : "0",
-        "isOptional" : "0",
-        "dataType" : "Int32",
-        "formattedType" : "MultiArray (Int32 1 × 512)",
-        "shortDescription" : "",
-        "shape" : "[1, 512]",
-        "name" : "attention_mask",
-        "type" : "MultiArray"
-      }
-    ],
-    "userDefinedMetadata" : {
-      "com.github.apple.coremltools.conversion_date" : "2025-10-08",
-      "com.github.apple.coremltools.source" : "torch==2.7.1",
-      "com.github.apple.coremltools.version" : "9.0b1",
-      "com.github.apple.coremltools.source_dialect" : "TorchScript"
-    },
-    "generatedClassName" : "SaT",
-    "method" : "predict"
-  }
-]

model.mil DELETED Viewed

@@ -1,204 +0,0 @@
-program(1.3)
-[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})]
-{
-    func main<ios18>(tensor<int32, [1, 512]> attention_mask, tensor<int32, [1, 512]> input_ids) {
-            int32 var_25 = const()[name = string("op_25"), val = int32(1)];
-            tensor<bool, [1, 512]> var_41 = not_equal(x = input_ids, y = var_25)[name = string("op_41")];
-            string mask_dtype_0 = const()[name = string("mask_dtype_0"), val = string("int32")];
-            bool var_43_exclusive_0 = const()[name = string("op_43_exclusive_0"), val = bool(false)];
-            bool var_43_reverse_0 = const()[name = string("op_43_reverse_0"), val = bool(false)];
-            tensor<int32, [1, 512]> mask = cast(dtype = mask_dtype_0, x = var_41)[name = string("cast_3")];
-            tensor<int32, [1, 512]> var_43 = cumsum(axis = var_25, exclusive = var_43_exclusive_0, reverse = var_43_reverse_0, x = mask)[name = string("op_43")];
-            tensor<int32, [1, 512]> incremental_indices = mul(x = var_43, y = mask)[name = string("incremental_indices")];
-            int32 var_49 = const()[name = string("op_49"), val = int32(1)];
-            tensor<int32, [1, 512]> input_3 = add(x = incremental_indices, y = var_49)[name = string("input_3")];
-            int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)];
-            bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)];
-            tensor<fp16, [250002, 768]> roberta_embeddings_word_embeddings_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [250002, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor<fp16, [250002, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192001664))))[name = string("roberta_embeddings_word_embeddings_weight_to_fp16_quantized")];
-            int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
-            tensor<bool, [1, 512]> greater_equal_0 = greater_equal(x = input_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
-            int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(250002)];
-            tensor<int32, [1, 512]> add_0 = add(x = input_ids, y = slice_by_index_0)[name = string("add_0")];
-            tensor<int32, [1, 512]> select_0 = select(a = input_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")];
-            int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)];
-            tensor<bool, [1, 512]> greater_equal_0_1 = greater_equal(x = select_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")];
-            int32 slice_by_index_0_1 = const()[name = string("slice_by_index_0_1"), val = int32(250002)];
-            tensor<int32, [1, 512]> add_0_1 = add(x = select_0, y = slice_by_index_0_1)[name = string("add_0_1")];
-            tensor<int32, [1, 512]> select_0_1 = select(a = select_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")];
-            int32 inputs_embeds_cast_fp16_axis_0 = const()[name = string("inputs_embeds_cast_fp16_axis_0"), val = int32(0)];
-            tensor<fp16, [1, 512, 768]> inputs_embeds_cast_fp16 = gather(axis = inputs_embeds_cast_fp16_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = select_0_1, validate_indices = inputs_embeds_validate_indices_0, x = roberta_embeddings_word_embeddings_weight_to_fp16_quantized)[name = string("inputs_embeds_cast_fp16")];
-            tensor<fp16, [1, 512, 768]> token_type_embeddings_1_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [1, 512, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204001856))), scale = tensor<fp16, [1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204395136))))[name = string("token_type_embeddings_1_to_fp16_quantized")];
-            tensor<fp16, [1, 512, 768]> embeddings_1_cast_fp16 = add(x = inputs_embeds_cast_fp16, y = token_type_embeddings_1_to_fp16_quantized)[name = string("embeddings_1_cast_fp16")];
-            int32 position_embeddings_1_axis_0 = const()[name = string("position_embeddings_1_axis_0"), val = int32(0)];
-            int32 position_embeddings_1_batch_dims_0 = const()[name = string("position_embeddings_1_batch_dims_0"), val = int32(0)];
-            bool position_embeddings_1_validate_indices_0 = const()[name = string("position_embeddings_1_validate_indices_0"), val = bool(false)];
-            tensor<fp16, [514, 768]> roberta_embeddings_position_embeddings_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [514, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204395264))), scale = tensor<fp16, [514, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204790080))))[name = string("roberta_embeddings_position_embeddings_weight_to_fp16_quantized")];
-            string input_3_to_uint16_dtype_0 = const()[name = string("input_3_to_uint16_dtype_0"), val = string("uint16")];
-            tensor<uint16, [1, 512]> input_3_to_uint16 = cast(dtype = input_3_to_uint16_dtype_0, x = input_3)[name = string("cast_2")];
-            tensor<fp16, [1, 512, 768]> position_embeddings_1_cast_fp16_cast_uint16 = gather(axis = position_embeddings_1_axis_0, batch_dims = position_embeddings_1_batch_dims_0, indices = input_3_to_uint16, validate_indices = position_embeddings_1_validate_indices_0, x = roberta_embeddings_position_embeddings_weight_to_fp16_quantized)[name = string("position_embeddings_1_cast_fp16_cast_uint16")];
-            tensor<fp16, [1, 512, 768]> input_5_cast_fp16 = add(x = embeddings_1_cast_fp16, y = position_embeddings_1_cast_fp16_cast_uint16)[name = string("input_5_cast_fp16")];
-            tensor<int32, [1]> input_7_axes_0 = const()[name = string("input_7_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [768]> roberta_embeddings_LayerNorm_weight_to_fp16 = const()[name = string("roberta_embeddings_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204814848)))];
-            tensor<fp16, [768]> roberta_embeddings_LayerNorm_bias_to_fp16 = const()[name = string("roberta_embeddings_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204816448)))];
-            fp16 var_20_to_fp16 = const()[name = string("op_20_to_fp16"), val = fp16(0x1.5p-17)];
-            tensor<fp16, [1, 512, 768]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = roberta_embeddings_LayerNorm_bias_to_fp16, epsilon = var_20_to_fp16, gamma = roberta_embeddings_LayerNorm_weight_to_fp16, x = input_5_cast_fp16)[name = string("input_7_cast_fp16")];
-            tensor<int32, [1]> var_67_axes_0 = const()[name = string("op_67_axes_0"), val = tensor<int32, [1]>([1])];
-            tensor<int32, [1, 1, 512]> var_67 = expand_dims(axes = var_67_axes_0, x = attention_mask)[name = string("op_67")];
-            tensor<int32, [1]> var_68_axes_0 = const()[name = string("op_68_axes_0"), val = tensor<int32, [1]>([2])];
-            tensor<int32, [1, 1, 1, 512]> var_68 = expand_dims(axes = var_68_axes_0, x = var_67)[name = string("op_68")];
-            tensor<int32, [4]> var_71_reps_0 = const()[name = string("op_71_reps_0"), val = tensor<int32, [4]>([1, 1, 512, 1])];
-            tensor<int32, [1, 1, 512, 512]> var_71 = tile(reps = var_71_reps_0, x = var_68)[name = string("op_71")];
-            fp16 const_4_to_fp16 = const()[name = string("const_4_to_fp16"), val = fp16(0x1p+0)];
-            string expanded_mask_to_fp16_dtype_0 = const()[name = string("expanded_mask_to_fp16_dtype_0"), val = string("fp16")];
-            tensor<fp16, [1, 1, 512, 512]> var_71_to_fp16 = cast(dtype = expanded_mask_to_fp16_dtype_0, x = var_71)[name = string("cast_1")];
-            tensor<fp16, [1, 1, 512, 512]> inverted_mask_cast_fp16 = sub(x = const_4_to_fp16, y = var_71_to_fp16)[name = string("inverted_mask_cast_fp16")];
-            string var_76_dtype_0 = const()[name = string("op_76_dtype_0"), val = string("bool")];
-            fp16 var_9_to_fp16 = const()[name = string("op_9_to_fp16"), val = fp16(-inf)];
-            tensor<bool, [1, 1, 512, 512]> inverted_mask_cast_fp16_to_bool = cast(dtype = var_76_dtype_0, x = inverted_mask_cast_fp16)[name = string("cast_0")];
-            tensor<fp16, [1, 1, 512, 512]> attention_mask_cast_fp16 = select(a = var_9_to_fp16, b = inverted_mask_cast_fp16, cond = inverted_mask_cast_fp16_to_bool)[name = string("attention_mask_cast_fp16")];
-            tensor<fp16, [768, 768]> roberta_encoder_layer_0_attention_self_query_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204818048))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205407936))))[name = string("roberta_encoder_layer_0_attention_self_query_weight_to_fp16_quantized")];
-            tensor<fp16, [768]> roberta_encoder_layer_0_attention_self_query_bias_to_fp16 = const()[name = string("roberta_encoder_layer_0_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205444864)))];
-            tensor<fp16, [1, 512, 768]> linear_0_cast_fp16 = linear(bias = roberta_encoder_layer_0_attention_self_query_bias_to_fp16, weight = roberta_encoder_layer_0_attention_self_query_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = string("linear_0_cast_fp16")];
-            tensor<int32, [4]> var_97 = const()[name = string("op_97"), val = tensor<int32, [4]>([1, -1, 12, 64])];
-            tensor<fp16, [1, 512, 12, 64]> var_98_cast_fp16 = reshape(shape = var_97, x = linear_0_cast_fp16)[name = string("op_98_cast_fp16")];
-            tensor<fp16, [768, 768]> roberta_encoder_layer_0_attention_self_key_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205446464))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206036352))))[name = string("roberta_encoder_layer_0_attention_self_key_weight_to_fp16_quantized")];
-            tensor<fp16, [768]> roberta_encoder_layer_0_attention_self_key_bias_to_fp16 = const()[name = string("roberta_encoder_layer_0_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206073280)))];
-            tensor<fp16, [1, 512, 768]> linear_1_cast_fp16 = linear(bias = roberta_encoder_layer_0_attention_self_key_bias_to_fp16, weight = roberta_encoder_layer_0_attention_self_key_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = string("linear_1_cast_fp16")];
-            tensor<int32, [4]> var_103 = const()[name = string("op_103"), val = tensor<int32, [4]>([1, -1, 12, 64])];
-            tensor<fp16, [1, 512, 12, 64]> var_104_cast_fp16 = reshape(shape = var_103, x = linear_1_cast_fp16)[name = string("op_104_cast_fp16")];
-            tensor<fp16, [768, 768]> roberta_encoder_layer_0_attention_self_value_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206074880))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206664768))))[name = string("roberta_encoder_layer_0_attention_self_value_weight_to_fp16_quantized")];
-            tensor<fp16, [768]> roberta_encoder_layer_0_attention_self_value_bias_to_fp16 = const()[name = string("roberta_encoder_layer_0_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206701696)))];
-            tensor<fp16, [1, 512, 768]> linear_2_cast_fp16 = linear(bias = roberta_encoder_layer_0_attention_self_value_bias_to_fp16, weight = roberta_encoder_layer_0_attention_self_value_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = string("linear_2_cast_fp16")];
-            tensor<int32, [4]> var_109 = const()[name = string("op_109"), val = tensor<int32, [4]>([1, -1, 12, 64])];
-            tensor<fp16, [1, 512, 12, 64]> var_110_cast_fp16 = reshape(shape = var_109, x = linear_2_cast_fp16)[name = string("op_110_cast_fp16")];
-            tensor<int32, [4]> transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
-            tensor<int32, [4]> transpose_19_perm_0 = const()[name = string("transpose_19_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
-            tensor<int32, [4]> transpose_20_perm_0 = const()[name = string("transpose_20_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
-            tensor<fp16, [1, 12, 512, 64]> transpose_20 = transpose(perm = transpose_20_perm_0, x = var_110_cast_fp16)[name = string("transpose_36")];
-            tensor<fp16, [1, 12, 512, 64]> transpose_19 = transpose(perm = transpose_19_perm_0, x = var_104_cast_fp16)[name = string("transpose_37")];
-            tensor<fp16, [1, 12, 512, 64]> transpose_18 = transpose(perm = transpose_18_perm_0, x = var_98_cast_fp16)[name = string("transpose_38")];
-            tensor<fp16, [1, 12, 512, 64]> attn_output_1_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_19, query = transpose_18, value = transpose_20)[name = string("attn_output_1_cast_fp16")];
-            tensor<int32, [4]> attn_output_3_perm_0 = const()[name = string("attn_output_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
-            tensor<int32, [3]> var_114 = const()[name = string("op_114"), val = tensor<int32, [3]>([1, 512, 768])];
-            tensor<fp16, [1, 512, 12, 64]> attn_output_3_cast_fp16 = transpose(perm = attn_output_3_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_35")];
-            tensor<fp16, [1, 512, 768]> input_9_cast_fp16 = reshape(shape = var_114, x = attn_output_3_cast_fp16)[name = string("input_9_cast_fp16")];
-            tensor<fp16, [768, 768]> roberta_encoder_layer_0_attention_output_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206703296))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207293184))))[name = string("roberta_encoder_layer_0_attention_output_dense_weight_to_fp16_quantized")];
-            tensor<fp16, [768]> roberta_encoder_layer_0_attention_output_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_0_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207330112)))];
-            tensor<fp16, [1, 512, 768]> linear_3_cast_fp16 = linear(bias = roberta_encoder_layer_0_attention_output_dense_bias_to_fp16, weight = roberta_encoder_layer_0_attention_output_dense_weight_to_fp16_quantized, x = input_9_cast_fp16)[name = string("linear_3_cast_fp16")];
-            tensor<fp16, [1, 512, 768]> input_13_cast_fp16 = add(x = linear_3_cast_fp16, y = input_7_cast_fp16)[name = string("input_13_cast_fp16")];
-            tensor<int32, [1]> input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [768]> roberta_encoder_layer_0_attention_output_LayerNorm_weight_to_fp16 = const()[name = string("roberta_encoder_layer_0_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207331712)))];
-            tensor<fp16, [768]> roberta_encoder_layer_0_attention_output_LayerNorm_bias_to_fp16 = const()[name = string("roberta_encoder_layer_0_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207333312)))];
-            tensor<fp16, [1, 512, 768]> input_15_cast_fp16 = layer_norm(axes = input_15_axes_0, beta = roberta_encoder_layer_0_attention_output_LayerNorm_bias_to_fp16, epsilon = var_20_to_fp16, gamma = roberta_encoder_layer_0_attention_output_LayerNorm_weight_to_fp16, x = input_13_cast_fp16)[name = string("input_15_cast_fp16")];
-            tensor<fp16, [3072, 768]> roberta_encoder_layer_0_intermediate_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207334912))), scale = tensor<fp16, [3072, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209694272))))[name = string("roberta_encoder_layer_0_intermediate_dense_weight_to_fp16_quantized")];
-            tensor<fp16, [3072]> roberta_encoder_layer_0_intermediate_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_0_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209841792)))];
-            tensor<fp16, [1, 512, 3072]> linear_4_cast_fp16 = linear(bias = roberta_encoder_layer_0_intermediate_dense_bias_to_fp16, weight = roberta_encoder_layer_0_intermediate_dense_weight_to_fp16_quantized, x = input_15_cast_fp16)[name = string("linear_4_cast_fp16")];
-            string input_19_mode_0 = const()[name = string("input_19_mode_0"), val = string("EXACT")];
-            tensor<fp16, [1, 512, 3072]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = linear_4_cast_fp16)[name = string("input_19_cast_fp16")];
-            tensor<fp16, [768, 3072]> roberta_encoder_layer_0_output_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209848000))), scale = tensor<fp16, [768, 96]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212207360))))[name = string("roberta_encoder_layer_0_output_dense_weight_to_fp16_quantized")];
-            tensor<fp16, [768]> roberta_encoder_layer_0_output_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_0_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212354880)))];
-            tensor<fp16, [1, 512, 768]> linear_5_cast_fp16 = linear(bias = roberta_encoder_layer_0_output_dense_bias_to_fp16, weight = roberta_encoder_layer_0_output_dense_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = string("linear_5_cast_fp16")];
-            tensor<fp16, [1, 512, 768]> input_23_cast_fp16 = add(x = linear_5_cast_fp16, y = input_15_cast_fp16)[name = string("input_23_cast_fp16")];
-            tensor<int32, [1]> hidden_states_7_axes_0 = const()[name = string("hidden_states_7_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [768]> roberta_encoder_layer_0_output_LayerNorm_weight_to_fp16 = const()[name = string("roberta_encoder_layer_0_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212356480)))];
-            tensor<fp16, [768]> roberta_encoder_layer_0_output_LayerNorm_bias_to_fp16 = const()[name = string("roberta_encoder_layer_0_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212358080)))];
-            tensor<fp16, [1, 512, 768]> hidden_states_7_cast_fp16 = layer_norm(axes = hidden_states_7_axes_0, beta = roberta_encoder_layer_0_output_LayerNorm_bias_to_fp16, epsilon = var_20_to_fp16, gamma = roberta_encoder_layer_0_output_LayerNorm_weight_to_fp16, x = input_23_cast_fp16)[name = string("hidden_states_7_cast_fp16")];
-            tensor<fp16, [768, 768]> roberta_encoder_layer_1_attention_self_query_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212359680))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212949568))))[name = string("roberta_encoder_layer_1_attention_self_query_weight_to_fp16_quantized")];
-            tensor<fp16, [768]> roberta_encoder_layer_1_attention_self_query_bias_to_fp16 = const()[name = string("roberta_encoder_layer_1_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212986496)))];
-            tensor<fp16, [1, 512, 768]> linear_6_cast_fp16 = linear(bias = roberta_encoder_layer_1_attention_self_query_bias_to_fp16, weight = roberta_encoder_layer_1_attention_self_query_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_6_cast_fp16")];
-            tensor<int32, [4]> var_156 = const()[name = string("op_156"), val = tensor<int32, [4]>([1, -1, 12, 64])];
-            tensor<fp16, [1, 512, 12, 64]> var_157_cast_fp16 = reshape(shape = var_156, x = linear_6_cast_fp16)[name = string("op_157_cast_fp16")];
-            tensor<fp16, [768, 768]> roberta_encoder_layer_1_attention_self_key_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212988096))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213577984))))[name = string("roberta_encoder_layer_1_attention_self_key_weight_to_fp16_quantized")];
-            tensor<fp16, [768]> roberta_encoder_layer_1_attention_self_key_bias_to_fp16 = const()[name = string("roberta_encoder_layer_1_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213614912)))];
-            tensor<fp16, [1, 512, 768]> linear_7_cast_fp16 = linear(bias = roberta_encoder_layer_1_attention_self_key_bias_to_fp16, weight = roberta_encoder_layer_1_attention_self_key_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_7_cast_fp16")];
-            tensor<int32, [4]> var_162 = const()[name = string("op_162"), val = tensor<int32, [4]>([1, -1, 12, 64])];
-            tensor<fp16, [1, 512, 12, 64]> var_163_cast_fp16 = reshape(shape = var_162, x = linear_7_cast_fp16)[name = string("op_163_cast_fp16")];
-            tensor<fp16, [768, 768]> roberta_encoder_layer_1_attention_self_value_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213616512))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214206400))))[name = string("roberta_encoder_layer_1_attention_self_value_weight_to_fp16_quantized")];
-            tensor<fp16, [768]> roberta_encoder_layer_1_attention_self_value_bias_to_fp16 = const()[name = string("roberta_encoder_layer_1_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214243328)))];
-            tensor<fp16, [1, 512, 768]> linear_8_cast_fp16 = linear(bias = roberta_encoder_layer_1_attention_self_value_bias_to_fp16, weight = roberta_encoder_layer_1_attention_self_value_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_8_cast_fp16")];
-            tensor<int32, [4]> var_168 = const()[name = string("op_168"), val = tensor<int32, [4]>([1, -1, 12, 64])];
-            tensor<fp16, [1, 512, 12, 64]> var_169_cast_fp16 = reshape(shape = var_168, x = linear_8_cast_fp16)[name = string("op_169_cast_fp16")];
-            tensor<int32, [4]> transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
-            tensor<int32, [4]> transpose_22_perm_0 = const()[name = string("transpose_22_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
-            tensor<int32, [4]> transpose_23_perm_0 = const()[name = string("transpose_23_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
-            tensor<fp16, [1, 12, 512, 64]> transpose_23 = transpose(perm = transpose_23_perm_0, x = var_169_cast_fp16)[name = string("transpose_32")];
-            tensor<fp16, [1, 12, 512, 64]> transpose_22 = transpose(perm = transpose_22_perm_0, x = var_163_cast_fp16)[name = string("transpose_33")];
-            tensor<fp16, [1, 12, 512, 64]> transpose_21 = transpose(perm = transpose_21_perm_0, x = var_157_cast_fp16)[name = string("transpose_34")];
-            tensor<fp16, [1, 12, 512, 64]> attn_output_5_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_22, query = transpose_21, value = transpose_23)[name = string("attn_output_5_cast_fp16")];
-            tensor<int32, [4]> attn_output_7_perm_0 = const()[name = string("attn_output_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
-            tensor<int32, [3]> var_173 = const()[name = string("op_173"), val = tensor<int32, [3]>([1, 512, 768])];
-            tensor<fp16, [1, 512, 12, 64]> attn_output_7_cast_fp16 = transpose(perm = attn_output_7_perm_0, x = attn_output_5_cast_fp16)[name = string("transpose_31")];
-            tensor<fp16, [1, 512, 768]> input_25_cast_fp16 = reshape(shape = var_173, x = attn_output_7_cast_fp16)[name = string("input_25_cast_fp16")];
-            tensor<fp16, [768, 768]> roberta_encoder_layer_1_attention_output_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214244928))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214834816))))[name = string("roberta_encoder_layer_1_attention_output_dense_weight_to_fp16_quantized")];
-            tensor<fp16, [768]> roberta_encoder_layer_1_attention_output_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_1_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214871744)))];
-            tensor<fp16, [1, 512, 768]> linear_9_cast_fp16 = linear(bias = roberta_encoder_layer_1_attention_output_dense_bias_to_fp16, weight = roberta_encoder_layer_1_attention_output_dense_weight_to_fp16_quantized, x = input_25_cast_fp16)[name = string("linear_9_cast_fp16")];
-            tensor<fp16, [1, 512, 768]> input_29_cast_fp16 = add(x = linear_9_cast_fp16, y = hidden_states_7_cast_fp16)[name = string("input_29_cast_fp16")];
-            tensor<int32, [1]> input_31_axes_0 = const()[name = string("input_31_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [768]> roberta_encoder_layer_1_attention_output_LayerNorm_weight_to_fp16 = const()[name = string("roberta_encoder_layer_1_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214873344)))];
-            tensor<fp16, [768]> roberta_encoder_layer_1_attention_output_LayerNorm_bias_to_fp16 = const()[name = string("roberta_encoder_layer_1_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214874944)))];
-            tensor<fp16, [1, 512, 768]> input_31_cast_fp16 = layer_norm(axes = input_31_axes_0, beta = roberta_encoder_layer_1_attention_output_LayerNorm_bias_to_fp16, epsilon = var_20_to_fp16, gamma = roberta_encoder_layer_1_attention_output_LayerNorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("input_31_cast_fp16")];
-            tensor<fp16, [3072, 768]> roberta_encoder_layer_1_intermediate_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214876544))), scale = tensor<fp16, [3072, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217235904))))[name = string("roberta_encoder_layer_1_intermediate_dense_weight_to_fp16_quantized")];
-            tensor<fp16, [3072]> roberta_encoder_layer_1_intermediate_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_1_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217383424)))];
-            tensor<fp16, [1, 512, 3072]> linear_10_cast_fp16 = linear(bias = roberta_encoder_layer_1_intermediate_dense_bias_to_fp16, weight = roberta_encoder_layer_1_intermediate_dense_weight_to_fp16_quantized, x = input_31_cast_fp16)[name = string("linear_10_cast_fp16")];
-            string input_35_mode_0 = const()[name = string("input_35_mode_0"), val = string("EXACT")];
-            tensor<fp16, [1, 512, 3072]> input_35_cast_fp16 = gelu(mode = input_35_mode_0, x = linear_10_cast_fp16)[name = string("input_35_cast_fp16")];
-            tensor<fp16, [768, 3072]> roberta_encoder_layer_1_output_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217389632))), scale = tensor<fp16, [768, 96]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219748992))))[name = string("roberta_encoder_layer_1_output_dense_weight_to_fp16_quantized")];
-            tensor<fp16, [768]> roberta_encoder_layer_1_output_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_1_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219896512)))];
-            tensor<fp16, [1, 512, 768]> linear_11_cast_fp16 = linear(bias = roberta_encoder_layer_1_output_dense_bias_to_fp16, weight = roberta_encoder_layer_1_output_dense_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = string("linear_11_cast_fp16")];
-            tensor<fp16, [1, 512, 768]> input_39_cast_fp16 = add(x = linear_11_cast_fp16, y = input_31_cast_fp16)[name = string("input_39_cast_fp16")];
-            tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [768]> roberta_encoder_layer_1_output_LayerNorm_weight_to_fp16 = const()[name = string("roberta_encoder_layer_1_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219898112)))];
-            tensor<fp16, [768]> roberta_encoder_layer_1_output_LayerNorm_bias_to_fp16 = const()[name = string("roberta_encoder_layer_1_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219899712)))];
-            tensor<fp16, [1, 512, 768]> hidden_states_13_cast_fp16 = layer_norm(axes = hidden_states_13_axes_0, beta = roberta_encoder_layer_1_output_LayerNorm_bias_to_fp16, epsilon = var_20_to_fp16, gamma = roberta_encoder_layer_1_output_LayerNorm_weight_to_fp16, x = input_39_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
-            tensor<fp16, [768, 768]> roberta_encoder_layer_2_attention_self_query_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219901312))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220491200))))[name = string("roberta_encoder_layer_2_attention_self_query_weight_to_fp16_quantized")];
-            tensor<fp16, [768]> roberta_encoder_layer_2_attention_self_query_bias_to_fp16 = const()[name = string("roberta_encoder_layer_2_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220528128)))];
-            tensor<fp16, [1, 512, 768]> linear_12_cast_fp16 = linear(bias = roberta_encoder_layer_2_attention_self_query_bias_to_fp16, weight = roberta_encoder_layer_2_attention_self_query_weight_to_fp16_quantized, x = hidden_states_13_cast_fp16)[name = string("linear_12_cast_fp16")];
-            tensor<int32, [4]> var_215 = const()[name = string("op_215"), val = tensor<int32, [4]>([1, -1, 12, 64])];
-            tensor<fp16, [1, 512, 12, 64]> var_216_cast_fp16 = reshape(shape = var_215, x = linear_12_cast_fp16)[name = string("op_216_cast_fp16")];
-            tensor<fp16, [768, 768]> roberta_encoder_layer_2_attention_self_key_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220529728))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221119616))))[name = string("roberta_encoder_layer_2_attention_self_key_weight_to_fp16_quantized")];
-            tensor<fp16, [768]> roberta_encoder_layer_2_attention_self_key_bias_to_fp16 = const()[name = string("roberta_encoder_layer_2_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221156544)))];
-            tensor<fp16, [1, 512, 768]> linear_13_cast_fp16 = linear(bias = roberta_encoder_layer_2_attention_self_key_bias_to_fp16, weight = roberta_encoder_layer_2_attention_self_key_weight_to_fp16_quantized, x = hidden_states_13_cast_fp16)[name = string("linear_13_cast_fp16")];
-            tensor<int32, [4]> var_221 = const()[name = string("op_221"), val = tensor<int32, [4]>([1, -1, 12, 64])];
-            tensor<fp16, [1, 512, 12, 64]> var_222_cast_fp16 = reshape(shape = var_221, x = linear_13_cast_fp16)[name = string("op_222_cast_fp16")];
-            tensor<fp16, [768, 768]> roberta_encoder_layer_2_attention_self_value_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221158144))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221748032))))[name = string("roberta_encoder_layer_2_attention_self_value_weight_to_fp16_quantized")];
-            tensor<fp16, [768]> roberta_encoder_layer_2_attention_self_value_bias_to_fp16 = const()[name = string("roberta_encoder_layer_2_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221784960)))];
-            tensor<fp16, [1, 512, 768]> linear_14_cast_fp16 = linear(bias = roberta_encoder_layer_2_attention_self_value_bias_to_fp16, weight = roberta_encoder_layer_2_attention_self_value_weight_to_fp16_quantized, x = hidden_states_13_cast_fp16)[name = string("linear_14_cast_fp16")];
-            tensor<int32, [4]> var_227 = const()[name = string("op_227"), val = tensor<int32, [4]>([1, -1, 12, 64])];
-            tensor<fp16, [1, 512, 12, 64]> var_228_cast_fp16 = reshape(shape = var_227, x = linear_14_cast_fp16)[name = string("op_228_cast_fp16")];
-            tensor<int32, [4]> transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
-            tensor<int32, [4]> transpose_25_perm_0 = const()[name = string("transpose_25_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
-            tensor<int32, [4]> transpose_26_perm_0 = const()[name = string("transpose_26_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
-            tensor<fp16, [1, 12, 512, 64]> transpose_26 = transpose(perm = transpose_26_perm_0, x = var_228_cast_fp16)[name = string("transpose_28")];
-            tensor<fp16, [1, 12, 512, 64]> transpose_25 = transpose(perm = transpose_25_perm_0, x = var_222_cast_fp16)[name = string("transpose_29")];
-            tensor<fp16, [1, 12, 512, 64]> transpose_24 = transpose(perm = transpose_24_perm_0, x = var_216_cast_fp16)[name = string("transpose_30")];
-            tensor<fp16, [1, 12, 512, 64]> attn_output_9_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_25, query = transpose_24, value = transpose_26)[name = string("attn_output_9_cast_fp16")];
-            tensor<int32, [4]> attn_output_perm_0 = const()[name = string("attn_output_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
-            tensor<int32, [3]> var_232 = const()[name = string("op_232"), val = tensor<int32, [3]>([1, 512, 768])];
-            tensor<fp16, [1, 512, 12, 64]> attn_output_cast_fp16 = transpose(perm = attn_output_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_27")];
-            tensor<fp16, [1, 512, 768]> input_41_cast_fp16 = reshape(shape = var_232, x = attn_output_cast_fp16)[name = string("input_41_cast_fp16")];
-            tensor<fp16, [768, 768]> roberta_encoder_layer_2_attention_output_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221786560))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222376448))))[name = string("roberta_encoder_layer_2_attention_output_dense_weight_to_fp16_quantized")];
-            tensor<fp16, [768]> roberta_encoder_layer_2_attention_output_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_2_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222413376)))];
-            tensor<fp16, [1, 512, 768]> linear_15_cast_fp16 = linear(bias = roberta_encoder_layer_2_attention_output_dense_bias_to_fp16, weight = roberta_encoder_layer_2_attention_output_dense_weight_to_fp16_quantized, x = input_41_cast_fp16)[name = string("linear_15_cast_fp16")];
-            tensor<fp16, [1, 512, 768]> input_45_cast_fp16 = add(x = linear_15_cast_fp16, y = hidden_states_13_cast_fp16)[name = string("input_45_cast_fp16")];
-            tensor<int32, [1]> input_47_axes_0 = const()[name = string("input_47_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [768]> roberta_encoder_layer_2_attention_output_LayerNorm_weight_to_fp16 = const()[name = string("roberta_encoder_layer_2_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222414976)))];
-            tensor<fp16, [768]> roberta_encoder_layer_2_attention_output_LayerNorm_bias_to_fp16 = const()[name = string("roberta_encoder_layer_2_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222416576)))];
-            tensor<fp16, [1, 512, 768]> input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, beta = roberta_encoder_layer_2_attention_output_LayerNorm_bias_to_fp16, epsilon = var_20_to_fp16, gamma = roberta_encoder_layer_2_attention_output_LayerNorm_weight_to_fp16, x = input_45_cast_fp16)[name = string("input_47_cast_fp16")];
-            tensor<fp16, [3072, 768]> roberta_encoder_layer_2_intermediate_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222418176))), scale = tensor<fp16, [3072, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224777536))))[name = string("roberta_encoder_layer_2_intermediate_dense_weight_to_fp16_quantized")];
-            tensor<fp16, [3072]> roberta_encoder_layer_2_intermediate_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_2_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224925056)))];
-            tensor<fp16, [1, 512, 3072]> linear_16_cast_fp16 = linear(bias = roberta_encoder_layer_2_intermediate_dense_bias_to_fp16, weight = roberta_encoder_layer_2_intermediate_dense_weight_to_fp16_quantized, x = input_47_cast_fp16)[name = string("linear_16_cast_fp16")];
-            string input_51_mode_0 = const()[name = string("input_51_mode_0"), val = string("EXACT")];
-            tensor<fp16, [1, 512, 3072]> input_51_cast_fp16 = gelu(mode = input_51_mode_0, x = linear_16_cast_fp16)[name = string("input_51_cast_fp16")];
-            tensor<fp16, [768, 3072]> roberta_encoder_layer_2_output_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224931264))), scale = tensor<fp16, [768, 96]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227290624))))[name = string("roberta_encoder_layer_2_output_dense_weight_to_fp16_quantized")];
-            tensor<fp16, [768]> roberta_encoder_layer_2_output_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_2_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227438144)))];
-            tensor<fp16, [1, 512, 768]> linear_17_cast_fp16 = linear(bias = roberta_encoder_layer_2_output_dense_bias_to_fp16, weight = roberta_encoder_layer_2_output_dense_weight_to_fp16_quantized, x = input_51_cast_fp16)[name = string("linear_17_cast_fp16")];
-            tensor<fp16, [1, 512, 768]> input_55_cast_fp16 = add(x = linear_17_cast_fp16, y = input_47_cast_fp16)[name = string("input_55_cast_fp16")];
-            tensor<int32, [1]> input_57_axes_0 = const()[name = string("input_57_axes_0"), val = tensor<int32, [1]>([-1])];
-            tensor<fp16, [768]> roberta_encoder_layer_2_output_LayerNorm_weight_to_fp16 = const()[name = string("roberta_encoder_layer_2_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227439744)))];
-            tensor<fp16, [768]> roberta_encoder_layer_2_output_LayerNorm_bias_to_fp16 = const()[name = string("roberta_encoder_layer_2_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227441344)))];
-            tensor<fp16, [1, 512, 768]> input_57_cast_fp16 = layer_norm(axes = input_57_axes_0, beta = roberta_encoder_layer_2_output_LayerNorm_bias_to_fp16, epsilon = var_20_to_fp16, gamma = roberta_encoder_layer_2_output_LayerNorm_weight_to_fp16, x = input_55_cast_fp16)[name = string("input_57_cast_fp16")];
-            tensor<fp16, [1, 768]> classifier_weight_to_fp16 = const()[name = string("classifier_weight_to_fp16"), val = tensor<fp16, [1, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227442944)))];
-            tensor<fp16, [1]> classifier_bias_to_fp16 = const()[name = string("classifier_bias_to_fp16"), val = tensor<fp16, [1]>([0x1.678p-12])];
-            tensor<fp16, [1, 512, 1]> output = linear(bias = classifier_bias_to_fp16, weight = classifier_weight_to_fp16, x = input_57_cast_fp16)[name = string("linear_18_cast_fp16")];
-        } -> (output);
-}

weights/weight.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b0395a363b84b8e638bc43ad40464023376e8efa4efdcae8389a1bffcf6b0c50
-size 227444544