quantized model

Files changed (5) hide show

SaT.mlmodelc/analytics/coremldata.bin +1 -1
SaT.mlmodelc/coremldata.bin +1 -1
SaT.mlmodelc/metadata.json +5 -4
SaT.mlmodelc/model.mil +78 -78
SaT.mlmodelc/weights/weight.bin +2 -2

SaT.mlmodelc/analytics/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3c7ca81d83547e93951dc7173fd9cab1828134887dca4ff988bbcd1f0eaccf5
 size 241

 version https://git-lfs.github.com/spec/v1
+oid sha256:f64f977399bfea6387639b0aa47c328e1ccb5c72ec192fafebe6a6a037482aa1
 size 241

SaT.mlmodelc/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2bcd0b3ee1481feafa7931ba0eb87d25bff4c4370ed2fbe74c8a44b3e05dc505
 size 347

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f7420b0c2ff587bdc15eb8b95e6adbc1d3ef598a76984e5cf6635be9344da29
 size 347

SaT.mlmodelc/metadata.json CHANGED Viewed

@@ -1,7 +1,7 @@
 [
   {
     "metadataOutputVersion" : "3.0",
-    "storagePrecision" : "Float32",
     "outputSchema" : [
       {
         "hasShapeFlexibility" : "0",
@@ -29,9 +29,10 @@
       "Ios16.cumsum" : 1,
       "Ios18.add" : 9,
       "Ios18.layerNorm" : 7,
-      "Ios18.transpose" : 12,
       "Ios18.cast" : 2,
       "Ios18.reshape" : 12,
       "Ios18.mul" : 2
     },
     "computePrecision" : "Mixed (Float32, Int32)",
@@ -51,9 +52,9 @@
       "name" : "MLModelType_mlProgram"
     },
     "userDefinedMetadata" : {
-      "com.github.apple.coremltools.version" : "8.3.0",
       "com.github.apple.coremltools.source_dialect" : "TorchScript",
-      "com.github.apple.coremltools.source" : "torch==2.9.0"
     },
     "inputSchema" : [
       {

 [
   {
     "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Mixed (Float32, Int8)",
     "outputSchema" : [
       {
         "hasShapeFlexibility" : "0",
       "Ios16.cumsum" : 1,
       "Ios18.add" : 9,
       "Ios18.layerNorm" : 7,
       "Ios18.cast" : 2,
+      "Ios18.transpose" : 12,
       "Ios18.reshape" : 12,
+      "Ios18.constexprBlockwiseShiftScale" : 21,
       "Ios18.mul" : 2
     },
     "computePrecision" : "Mixed (Float32, Int32)",
       "name" : "MLModelType_mlProgram"
     },
     "userDefinedMetadata" : {
       "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.9.0",
+      "com.github.apple.coremltools.version" : "8.3.0"
     },
     "inputSchema" : [
       {

SaT.mlmodelc/model.mil CHANGED Viewed

@@ -1,61 +1,61 @@
 program(1.3)
-[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.9.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
 {
     func main<ios18>(tensor<int32, [1, 512]> attention_mask, tensor<int32, [1, 512]> input_ids) {
-            tensor<fp32, [250002, 768]> base_model_roberta_embeddings_word_embeddings_weight = const()[name = string("base_model_roberta_embeddings_word_embeddings_weight"), val = tensor<fp32, [250002, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
-            tensor<fp32, [514, 768]> base_model_roberta_embeddings_position_embeddings_weight = const()[name = string("base_model_roberta_embeddings_position_embeddings_weight"), val = tensor<fp32, [514, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(768006272)))];
-            tensor<fp32, [768]> base_model_roberta_embeddings_LayerNorm_bias = const()[name = string("base_model_roberta_embeddings_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769585344)))];
-            tensor<fp32, [768]> base_model_roberta_embeddings_LayerNorm_weight = const()[name = string("base_model_roberta_embeddings_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769588480)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769591616)))];
-            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_self_query_weight = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_query_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769594752)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(771954112)))];
-            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_self_key_weight = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_key_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(771957248)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774316608)))];
-            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_self_value_weight = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_value_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774319744)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(776679104)))];
-            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_output_dense_weight = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_dense_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(776682240)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(779041600)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(779044736)))];
-            tensor<fp32, [3072]> base_model_roberta_encoder_layer_0_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(779047872)))];
-            tensor<fp32, [3072, 768]> base_model_roberta_encoder_layer_0_intermediate_dense_weight = const()[name = string("base_model_roberta_encoder_layer_0_intermediate_dense_weight"), val = tensor<fp32, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(779060224)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(788497472)))];
-            tensor<fp32, [768, 3072]> base_model_roberta_encoder_layer_0_output_dense_weight = const()[name = string("base_model_roberta_encoder_layer_0_output_dense_weight"), val = tensor<fp32, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(788500608)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_0_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797937856)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_0_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797940992)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797944128)))];
-            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_self_query_weight = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_query_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797947264)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800306624)))];
-            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_self_key_weight = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_key_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800309760)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(802669120)))];
-            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_self_value_weight = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_value_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(802672256)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(805031616)))];
-            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_output_dense_weight = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_dense_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(805034752)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807394112)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807397248)))];
-            tensor<fp32, [3072]> base_model_roberta_encoder_layer_1_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807400384)))];
-            tensor<fp32, [3072, 768]> base_model_roberta_encoder_layer_1_intermediate_dense_weight = const()[name = string("base_model_roberta_encoder_layer_1_intermediate_dense_weight"), val = tensor<fp32, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807412736)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816849984)))];
-            tensor<fp32, [768, 3072]> base_model_roberta_encoder_layer_1_output_dense_weight = const()[name = string("base_model_roberta_encoder_layer_1_output_dense_weight"), val = tensor<fp32, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816853120)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_1_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826290368)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_1_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826293504)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826296640)))];
-            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_self_query_weight = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_query_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826299776)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(828659136)))];
-            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_self_key_weight = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_key_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(828662272)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(831021632)))];
-            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_self_value_weight = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_value_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(831024768)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833384128)))];
-            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_output_dense_weight = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_dense_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833387264)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835746624)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835749760)))];
-            tensor<fp32, [3072]> base_model_roberta_encoder_layer_2_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835752896)))];
-            tensor<fp32, [3072, 768]> base_model_roberta_encoder_layer_2_intermediate_dense_weight = const()[name = string("base_model_roberta_encoder_layer_2_intermediate_dense_weight"), val = tensor<fp32, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835765248)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(845202496)))];
-            tensor<fp32, [768, 3072]> base_model_roberta_encoder_layer_2_output_dense_weight = const()[name = string("base_model_roberta_encoder_layer_2_output_dense_weight"), val = tensor<fp32, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(845205632)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_2_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854642880)))];
-            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_2_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854646016)))];
             tensor<fp32, [1]> base_model_classifier_bias = const()[name = string("base_model_classifier_bias"), val = tensor<fp32, [1]>([0x1.679ac8p-12])];
-            tensor<fp32, [1, 768]> base_model_classifier_weight = const()[name = string("base_model_classifier_weight"), val = tensor<fp32, [1, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854649152)))];
             int32 var_15 = const()[name = string("op_15"), val = int32(1)];
             fp32 var_22 = const()[name = string("op_22"), val = fp32(0x1p+0)];
             fp32 var_25 = const()[name = string("op_25"), val = fp32(0x1.4f8b58p-17)];
@@ -64,7 +64,7 @@ program(1.3)
             tensor<int32, [1]> var_47_axes_0 = const()[name = string("op_47_axes_0"), val = tensor<int32, [1]>([2])];
             tensor<int32, [1, 1, 1, 512]> var_47 = expand_dims(axes = var_47_axes_0, x = var_46)[name = string("op_47")];
             string cast_2_dtype_0 = const()[name = string("cast_2_dtype_0"), val = string("fp32")];
-            tensor<fp32, [1, 1, 1, 512]> cast_2 = cast(dtype = cast_2_dtype_0, x = var_47)[name = string("cast_22")];
             tensor<fp32, [1, 1, 1, 512]> var_50 = sub(x = var_22, y = cast_2)[name = string("op_50")];
             fp32 var_51 = const()[name = string("op_51"), val = fp32(-0x1.fffffep+127)];
             tensor<fp32, [1, 1, 1, 512]> attention_mask_1 = mul(x = var_50, y = var_51)[name = string("attention_mask")];
@@ -72,7 +72,7 @@ program(1.3)
             string cast_3_dtype_0 = const()[name = string("cast_3_dtype_0"), val = string("int32")];
             bool var_59_exclusive_0 = const()[name = string("op_59_exclusive_0"), val = bool(false)];
             bool var_59_reverse_0 = const()[name = string("op_59_reverse_0"), val = bool(false)];
-            tensor<int32, [1, 512]> cast_3 = cast(dtype = cast_3_dtype_0, x = var_57)[name = string("cast_21")];
             tensor<int32, [1, 512]> var_59 = cumsum(axis = var_15, exclusive = var_59_exclusive_0, reverse = var_59_reverse_0, x = cast_3)[name = string("op_59")];
             tensor<int32, [1, 512]> incremental_indices = mul(x = var_59, y = cast_3)[name = string("incremental_indices")];
             int32 var_65 = const()[name = string("op_65"), val = int32(1)];
@@ -80,23 +80,23 @@ program(1.3)
             int32 inputs_embeds_axis_0 = const()[name = string("inputs_embeds_axis_0"), val = int32(0)];
             int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)];
             bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)];
-            tensor<fp32, [1, 512, 768]> inputs_embeds = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = input_ids, validate_indices = inputs_embeds_validate_indices_0, x = base_model_roberta_embeddings_word_embeddings_weight)[name = string("inputs_embeds")];
-            tensor<fp32, [1, 512, 768]> token_type_embeddings_1 = const()[name = string("token_type_embeddings_1"), val = tensor<fp32, [1, 512, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854652288)))];
-            tensor<fp32, [1, 512, 768]> embeddings_1 = add(x = inputs_embeds, y = token_type_embeddings_1)[name = string("embeddings_1")];
             int32 position_embeddings_1_axis_0 = const()[name = string("position_embeddings_1_axis_0"), val = int32(0)];
             int32 position_embeddings_1_batch_dims_0 = const()[name = string("position_embeddings_1_batch_dims_0"), val = int32(0)];
             bool position_embeddings_1_validate_indices_0 = const()[name = string("position_embeddings_1_validate_indices_0"), val = bool(false)];
-            tensor<fp32, [1, 512, 768]> position_embeddings_1 = gather(axis = position_embeddings_1_axis_0, batch_dims = position_embeddings_1_batch_dims_0, indices = input_3, validate_indices = position_embeddings_1_validate_indices_0, x = base_model_roberta_embeddings_position_embeddings_weight)[name = string("position_embeddings_1")];
             tensor<fp32, [1, 512, 768]> input_5 = add(x = embeddings_1, y = position_embeddings_1)[name = string("input_5")];
             tensor<int32, [1]> input_7_axes_0 = const()[name = string("input_7_axes_0"), val = tensor<int32, [1]>([-1])];
             tensor<fp32, [1, 512, 768]> input_7 = layer_norm(axes = input_7_axes_0, beta = base_model_roberta_embeddings_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_embeddings_LayerNorm_weight, x = input_5)[name = string("input_7")];
-            tensor<fp32, [1, 512, 768]> x_1 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_query_bias, weight = base_model_roberta_encoder_layer_0_attention_self_query_weight, x = input_7)[name = string("linear_0")];
             tensor<int32, [4]> var_101 = const()[name = string("op_101"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x_3 = reshape(shape = var_101, x = x_1)[name = string("x_3")];
-            tensor<fp32, [1, 512, 768]> x_5 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_key_bias, weight = base_model_roberta_encoder_layer_0_attention_self_key_weight, x = input_7)[name = string("linear_1")];
             tensor<int32, [4]> var_110 = const()[name = string("op_110"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x_7 = reshape(shape = var_110, x = x_5)[name = string("x_7")];
-            tensor<fp32, [1, 512, 768]> x_9 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_value_bias, weight = base_model_roberta_encoder_layer_0_attention_self_value_weight, x = input_7)[name = string("linear_2")];
             tensor<int32, [4]> var_119 = const()[name = string("op_119"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x_11 = reshape(shape = var_119, x = x_9)[name = string("x_11")];
             tensor<int32, [4]> transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
@@ -110,24 +110,24 @@ program(1.3)
             tensor<int32, [3]> var_125 = const()[name = string("op_125"), val = tensor<int32, [3]>([1, 512, 768])];
             tensor<fp32, [1, 512, 12, 64]> attn_output_3 = transpose(perm = attn_output_3_perm_0, x = attn_output_1)[name = string("transpose_35")];
             tensor<fp32, [1, 512, 768]> input_9 = reshape(shape = var_125, x = attn_output_3)[name = string("input_9")];
-            tensor<fp32, [1, 512, 768]> input_11 = linear(bias = base_model_roberta_encoder_layer_0_attention_output_dense_bias, weight = base_model_roberta_encoder_layer_0_attention_output_dense_weight, x = input_9)[name = string("linear_3")];
             tensor<fp32, [1, 512, 768]> input_13 = add(x = input_11, y = input_7)[name = string("input_13")];
             tensor<int32, [1]> input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor<int32, [1]>([-1])];
             tensor<fp32, [1, 512, 768]> input_15 = layer_norm(axes = input_15_axes_0, beta = base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight, x = input_13)[name = string("input_15")];
-            tensor<fp32, [1, 512, 3072]> input_17 = linear(bias = base_model_roberta_encoder_layer_0_intermediate_dense_bias, weight = base_model_roberta_encoder_layer_0_intermediate_dense_weight, x = input_15)[name = string("linear_4")];
             string input_19_mode_0 = const()[name = string("input_19_mode_0"), val = string("EXACT")];
             tensor<fp32, [1, 512, 3072]> input_19 = gelu(mode = input_19_mode_0, x = input_17)[name = string("input_19")];
-            tensor<fp32, [1, 512, 768]> input_21 = linear(bias = base_model_roberta_encoder_layer_0_output_dense_bias, weight = base_model_roberta_encoder_layer_0_output_dense_weight, x = input_19)[name = string("linear_5")];
             tensor<fp32, [1, 512, 768]> input_23 = add(x = input_21, y = input_15)[name = string("input_23")];
             tensor<int32, [1]> hidden_states_7_axes_0 = const()[name = string("hidden_states_7_axes_0"), val = tensor<int32, [1]>([-1])];
             tensor<fp32, [1, 512, 768]> hidden_states_7 = layer_norm(axes = hidden_states_7_axes_0, beta = base_model_roberta_encoder_layer_0_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_0_output_LayerNorm_weight, x = input_23)[name = string("hidden_states_7")];
-            tensor<fp32, [1, 512, 768]> x_13 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_query_bias, weight = base_model_roberta_encoder_layer_1_attention_self_query_weight, x = hidden_states_7)[name = string("linear_6")];
             tensor<int32, [4]> var_169 = const()[name = string("op_169"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x_15 = reshape(shape = var_169, x = x_13)[name = string("x_15")];
-            tensor<fp32, [1, 512, 768]> x_17 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_key_bias, weight = base_model_roberta_encoder_layer_1_attention_self_key_weight, x = hidden_states_7)[name = string("linear_7")];
             tensor<int32, [4]> var_178 = const()[name = string("op_178"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x_19 = reshape(shape = var_178, x = x_17)[name = string("x_19")];
-            tensor<fp32, [1, 512, 768]> x_21 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_value_bias, weight = base_model_roberta_encoder_layer_1_attention_self_value_weight, x = hidden_states_7)[name = string("linear_8")];
             tensor<int32, [4]> var_187 = const()[name = string("op_187"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x_23 = reshape(shape = var_187, x = x_21)[name = string("x_23")];
             tensor<int32, [4]> transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
@@ -141,24 +141,24 @@ program(1.3)
             tensor<int32, [3]> var_193 = const()[name = string("op_193"), val = tensor<int32, [3]>([1, 512, 768])];
             tensor<fp32, [1, 512, 12, 64]> attn_output_7 = transpose(perm = attn_output_7_perm_0, x = attn_output_5)[name = string("transpose_31")];
             tensor<fp32, [1, 512, 768]> input_25 = reshape(shape = var_193, x = attn_output_7)[name = string("input_25")];
-            tensor<fp32, [1, 512, 768]> input_27 = linear(bias = base_model_roberta_encoder_layer_1_attention_output_dense_bias, weight = base_model_roberta_encoder_layer_1_attention_output_dense_weight, x = input_25)[name = string("linear_9")];
             tensor<fp32, [1, 512, 768]> input_29 = add(x = input_27, y = hidden_states_7)[name = string("input_29")];
             tensor<int32, [1]> input_31_axes_0 = const()[name = string("input_31_axes_0"), val = tensor<int32, [1]>([-1])];
             tensor<fp32, [1, 512, 768]> input_31 = layer_norm(axes = input_31_axes_0, beta = base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight, x = input_29)[name = string("input_31")];
-            tensor<fp32, [1, 512, 3072]> input_33 = linear(bias = base_model_roberta_encoder_layer_1_intermediate_dense_bias, weight = base_model_roberta_encoder_layer_1_intermediate_dense_weight, x = input_31)[name = string("linear_10")];
             string input_35_mode_0 = const()[name = string("input_35_mode_0"), val = string("EXACT")];
             tensor<fp32, [1, 512, 3072]> input_35 = gelu(mode = input_35_mode_0, x = input_33)[name = string("input_35")];
-            tensor<fp32, [1, 512, 768]> input_37 = linear(bias = base_model_roberta_encoder_layer_1_output_dense_bias, weight = base_model_roberta_encoder_layer_1_output_dense_weight, x = input_35)[name = string("linear_11")];
             tensor<fp32, [1, 512, 768]> input_39 = add(x = input_37, y = input_31)[name = string("input_39")];
             tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
             tensor<fp32, [1, 512, 768]> hidden_states_13 = layer_norm(axes = hidden_states_13_axes_0, beta = base_model_roberta_encoder_layer_1_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_1_output_LayerNorm_weight, x = input_39)[name = string("hidden_states_13")];
-            tensor<fp32, [1, 512, 768]> x_25 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_query_bias, weight = base_model_roberta_encoder_layer_2_attention_self_query_weight, x = hidden_states_13)[name = string("linear_12")];
             tensor<int32, [4]> var_237 = const()[name = string("op_237"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x_27 = reshape(shape = var_237, x = x_25)[name = string("x_27")];
-            tensor<fp32, [1, 512, 768]> x_29 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_key_bias, weight = base_model_roberta_encoder_layer_2_attention_self_key_weight, x = hidden_states_13)[name = string("linear_13")];
             tensor<int32, [4]> var_246 = const()[name = string("op_246"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x_31 = reshape(shape = var_246, x = x_29)[name = string("x_31")];
-            tensor<fp32, [1, 512, 768]> x_33 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_value_bias, weight = base_model_roberta_encoder_layer_2_attention_self_value_weight, x = hidden_states_13)[name = string("linear_14")];
             tensor<int32, [4]> var_255 = const()[name = string("op_255"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x = reshape(shape = var_255, x = x_33)[name = string("x")];
             tensor<int32, [4]> transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
@@ -172,14 +172,14 @@ program(1.3)
             tensor<int32, [3]> var_261 = const()[name = string("op_261"), val = tensor<int32, [3]>([1, 512, 768])];
             tensor<fp32, [1, 512, 12, 64]> attn_output = transpose(perm = attn_output_perm_0, x = attn_output_9)[name = string("transpose_27")];
             tensor<fp32, [1, 512, 768]> input_41 = reshape(shape = var_261, x = attn_output)[name = string("input_41")];
-            tensor<fp32, [1, 512, 768]> input_43 = linear(bias = base_model_roberta_encoder_layer_2_attention_output_dense_bias, weight = base_model_roberta_encoder_layer_2_attention_output_dense_weight, x = input_41)[name = string("linear_15")];
             tensor<fp32, [1, 512, 768]> input_45 = add(x = input_43, y = hidden_states_13)[name = string("input_45")];
             tensor<int32, [1]> input_47_axes_0 = const()[name = string("input_47_axes_0"), val = tensor<int32, [1]>([-1])];
             tensor<fp32, [1, 512, 768]> input_47 = layer_norm(axes = input_47_axes_0, beta = base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight, x = input_45)[name = string("input_47")];
-            tensor<fp32, [1, 512, 3072]> input_49 = linear(bias = base_model_roberta_encoder_layer_2_intermediate_dense_bias, weight = base_model_roberta_encoder_layer_2_intermediate_dense_weight, x = input_47)[name = string("linear_16")];
             string input_51_mode_0 = const()[name = string("input_51_mode_0"), val = string("EXACT")];
             tensor<fp32, [1, 512, 3072]> input_51 = gelu(mode = input_51_mode_0, x = input_49)[name = string("input_51")];
-            tensor<fp32, [1, 512, 768]> input_53 = linear(bias = base_model_roberta_encoder_layer_2_output_dense_bias, weight = base_model_roberta_encoder_layer_2_output_dense_weight, x = input_51)[name = string("linear_17")];
             tensor<fp32, [1, 512, 768]> input_55 = add(x = input_53, y = input_47)[name = string("input_55")];
             tensor<int32, [1]> input_57_axes_0 = const()[name = string("input_57_axes_0"), val = tensor<int32, [1]>([-1])];
             tensor<fp32, [1, 512, 768]> input_57 = layer_norm(axes = input_57_axes_0, beta = base_model_roberta_encoder_layer_2_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_2_output_LayerNorm_weight, x = input_55)[name = string("input_57")];

 program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})]
 {
     func main<ios18>(tensor<int32, [1, 512]> attention_mask, tensor<int32, [1, 512]> input_ids) {
+            tensor<fp32, [250002, 768]> base_model_roberta_embeddings_word_embeddings_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [250002, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor<fp32, [250002, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192001664))))[name = string("base_model_roberta_embeddings_word_embeddings_weight_quantized")];
+            tensor<fp32, [514, 768]> base_model_roberta_embeddings_position_embeddings_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [514, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216001920))), scale = tensor<fp32, [514, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216396736))))[name = string("base_model_roberta_embeddings_position_embeddings_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_embeddings_LayerNorm_bias = const()[name = string("base_model_roberta_embeddings_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216446144)))];
+            tensor<fp32, [768]> base_model_roberta_embeddings_LayerNorm_weight = const()[name = string("base_model_roberta_embeddings_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216449280)))];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216452416)))];
+            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_self_query_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216455552))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217045440))))[name = string("base_model_roberta_encoder_layer_0_attention_self_query_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217119232)))];
+            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_self_key_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217122368))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217712256))))[name = string("base_model_roberta_encoder_layer_0_attention_self_key_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217786048)))];
+            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_self_value_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217789184))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218379072))))[name = string("base_model_roberta_encoder_layer_0_attention_self_value_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218452864)))];
+            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218456000))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219045888))))[name = string("base_model_roberta_encoder_layer_0_attention_output_dense_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219119680)))];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219122816)))];
+            tensor<fp32, [3072]> base_model_roberta_encoder_layer_0_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219125952)))];
+            tensor<fp32, [3072, 768]> base_model_roberta_encoder_layer_0_intermediate_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219138304))), scale = tensor<fp32, [3072, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221497664))))[name = string("base_model_roberta_encoder_layer_0_intermediate_dense_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221792640)))];
+            tensor<fp32, [768, 3072]> base_model_roberta_encoder_layer_0_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221795776))), scale = tensor<fp32, [768, 96]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224155136))))[name = string("base_model_roberta_encoder_layer_0_output_dense_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_0_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224450112)))];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_0_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224453248)))];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224456384)))];
+            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_self_query_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224459520))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225049408))))[name = string("base_model_roberta_encoder_layer_1_attention_self_query_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225123200)))];
+            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_self_key_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225126336))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225716224))))[name = string("base_model_roberta_encoder_layer_1_attention_self_key_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225790016)))];
+            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_self_value_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225793152))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226383040))))[name = string("base_model_roberta_encoder_layer_1_attention_self_value_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226456832)))];
+            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226459968))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227049856))))[name = string("base_model_roberta_encoder_layer_1_attention_output_dense_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227123648)))];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227126784)))];
+            tensor<fp32, [3072]> base_model_roberta_encoder_layer_1_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227129920)))];
+            tensor<fp32, [3072, 768]> base_model_roberta_encoder_layer_1_intermediate_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227142272))), scale = tensor<fp32, [3072, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229501632))))[name = string("base_model_roberta_encoder_layer_1_intermediate_dense_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229796608)))];
+            tensor<fp32, [768, 3072]> base_model_roberta_encoder_layer_1_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229799744))), scale = tensor<fp32, [768, 96]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232159104))))[name = string("base_model_roberta_encoder_layer_1_output_dense_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_1_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232454080)))];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_1_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232457216)))];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232460352)))];
+            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_self_query_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232463488))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233053376))))[name = string("base_model_roberta_encoder_layer_2_attention_self_query_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233127168)))];
+            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_self_key_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233130304))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233720192))))[name = string("base_model_roberta_encoder_layer_2_attention_self_key_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233793984)))];
+            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_self_value_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233797120))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234387008))))[name = string("base_model_roberta_encoder_layer_2_attention_self_value_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234460800)))];
+            tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234463936))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235053824))))[name = string("base_model_roberta_encoder_layer_2_attention_output_dense_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235127616)))];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235130752)))];
+            tensor<fp32, [3072]> base_model_roberta_encoder_layer_2_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235133888)))];
+            tensor<fp32, [3072, 768]> base_model_roberta_encoder_layer_2_intermediate_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235146240))), scale = tensor<fp32, [3072, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237505600))))[name = string("base_model_roberta_encoder_layer_2_intermediate_dense_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237800576)))];
+            tensor<fp32, [768, 3072]> base_model_roberta_encoder_layer_2_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237803712))), scale = tensor<fp32, [768, 96]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240163072))))[name = string("base_model_roberta_encoder_layer_2_output_dense_weight_quantized")];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_2_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240458048)))];
+            tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_2_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240461184)))];
             tensor<fp32, [1]> base_model_classifier_bias = const()[name = string("base_model_classifier_bias"), val = tensor<fp32, [1]>([0x1.679ac8p-12])];
+            tensor<fp32, [1, 768]> base_model_classifier_weight = const()[name = string("base_model_classifier_weight"), val = tensor<fp32, [1, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240464320)))];
             int32 var_15 = const()[name = string("op_15"), val = int32(1)];
             fp32 var_22 = const()[name = string("op_22"), val = fp32(0x1p+0)];
             fp32 var_25 = const()[name = string("op_25"), val = fp32(0x1.4f8b58p-17)];
             tensor<int32, [1]> var_47_axes_0 = const()[name = string("op_47_axes_0"), val = tensor<int32, [1]>([2])];
             tensor<int32, [1, 1, 1, 512]> var_47 = expand_dims(axes = var_47_axes_0, x = var_46)[name = string("op_47")];
             string cast_2_dtype_0 = const()[name = string("cast_2_dtype_0"), val = string("fp32")];
+            tensor<fp32, [1, 1, 1, 512]> cast_2 = cast(dtype = cast_2_dtype_0, x = var_47)[name = string("cast_1")];
             tensor<fp32, [1, 1, 1, 512]> var_50 = sub(x = var_22, y = cast_2)[name = string("op_50")];
             fp32 var_51 = const()[name = string("op_51"), val = fp32(-0x1.fffffep+127)];
             tensor<fp32, [1, 1, 1, 512]> attention_mask_1 = mul(x = var_50, y = var_51)[name = string("attention_mask")];
             string cast_3_dtype_0 = const()[name = string("cast_3_dtype_0"), val = string("int32")];
             bool var_59_exclusive_0 = const()[name = string("op_59_exclusive_0"), val = bool(false)];
             bool var_59_reverse_0 = const()[name = string("op_59_reverse_0"), val = bool(false)];
+            tensor<int32, [1, 512]> cast_3 = cast(dtype = cast_3_dtype_0, x = var_57)[name = string("cast_0")];
             tensor<int32, [1, 512]> var_59 = cumsum(axis = var_15, exclusive = var_59_exclusive_0, reverse = var_59_reverse_0, x = cast_3)[name = string("op_59")];
             tensor<int32, [1, 512]> incremental_indices = mul(x = var_59, y = cast_3)[name = string("incremental_indices")];
             int32 var_65 = const()[name = string("op_65"), val = int32(1)];
             int32 inputs_embeds_axis_0 = const()[name = string("inputs_embeds_axis_0"), val = int32(0)];
             int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)];
             bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)];
+            tensor<fp32, [1, 512, 768]> inputs_embeds = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = input_ids, validate_indices = inputs_embeds_validate_indices_0, x = base_model_roberta_embeddings_word_embeddings_weight_quantized)[name = string("inputs_embeds")];
+            tensor<fp32, [1, 512, 768]> token_type_embeddings_1_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [1, 512, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240467456))), scale = tensor<fp32, [1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240860736))))[name = string("token_type_embeddings_1_quantized")];
+            tensor<fp32, [1, 512, 768]> embeddings_1 = add(x = inputs_embeds, y = token_type_embeddings_1_quantized)[name = string("embeddings_1")];
             int32 position_embeddings_1_axis_0 = const()[name = string("position_embeddings_1_axis_0"), val = int32(0)];
             int32 position_embeddings_1_batch_dims_0 = const()[name = string("position_embeddings_1_batch_dims_0"), val = int32(0)];
             bool position_embeddings_1_validate_indices_0 = const()[name = string("position_embeddings_1_validate_indices_0"), val = bool(false)];
+            tensor<fp32, [1, 512, 768]> position_embeddings_1 = gather(axis = position_embeddings_1_axis_0, batch_dims = position_embeddings_1_batch_dims_0, indices = input_3, validate_indices = position_embeddings_1_validate_indices_0, x = base_model_roberta_embeddings_position_embeddings_weight_quantized)[name = string("position_embeddings_1")];
             tensor<fp32, [1, 512, 768]> input_5 = add(x = embeddings_1, y = position_embeddings_1)[name = string("input_5")];
             tensor<int32, [1]> input_7_axes_0 = const()[name = string("input_7_axes_0"), val = tensor<int32, [1]>([-1])];
             tensor<fp32, [1, 512, 768]> input_7 = layer_norm(axes = input_7_axes_0, beta = base_model_roberta_embeddings_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_embeddings_LayerNorm_weight, x = input_5)[name = string("input_7")];
+            tensor<fp32, [1, 512, 768]> x_1 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_query_bias, weight = base_model_roberta_encoder_layer_0_attention_self_query_weight_quantized, x = input_7)[name = string("linear_0")];
             tensor<int32, [4]> var_101 = const()[name = string("op_101"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x_3 = reshape(shape = var_101, x = x_1)[name = string("x_3")];
+            tensor<fp32, [1, 512, 768]> x_5 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_key_bias, weight = base_model_roberta_encoder_layer_0_attention_self_key_weight_quantized, x = input_7)[name = string("linear_1")];
             tensor<int32, [4]> var_110 = const()[name = string("op_110"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x_7 = reshape(shape = var_110, x = x_5)[name = string("x_7")];
+            tensor<fp32, [1, 512, 768]> x_9 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_value_bias, weight = base_model_roberta_encoder_layer_0_attention_self_value_weight_quantized, x = input_7)[name = string("linear_2")];
             tensor<int32, [4]> var_119 = const()[name = string("op_119"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x_11 = reshape(shape = var_119, x = x_9)[name = string("x_11")];
             tensor<int32, [4]> transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
             tensor<int32, [3]> var_125 = const()[name = string("op_125"), val = tensor<int32, [3]>([1, 512, 768])];
             tensor<fp32, [1, 512, 12, 64]> attn_output_3 = transpose(perm = attn_output_3_perm_0, x = attn_output_1)[name = string("transpose_35")];
             tensor<fp32, [1, 512, 768]> input_9 = reshape(shape = var_125, x = attn_output_3)[name = string("input_9")];
+            tensor<fp32, [1, 512, 768]> input_11 = linear(bias = base_model_roberta_encoder_layer_0_attention_output_dense_bias, weight = base_model_roberta_encoder_layer_0_attention_output_dense_weight_quantized, x = input_9)[name = string("linear_3")];
             tensor<fp32, [1, 512, 768]> input_13 = add(x = input_11, y = input_7)[name = string("input_13")];
             tensor<int32, [1]> input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor<int32, [1]>([-1])];
             tensor<fp32, [1, 512, 768]> input_15 = layer_norm(axes = input_15_axes_0, beta = base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight, x = input_13)[name = string("input_15")];
+            tensor<fp32, [1, 512, 3072]> input_17 = linear(bias = base_model_roberta_encoder_layer_0_intermediate_dense_bias, weight = base_model_roberta_encoder_layer_0_intermediate_dense_weight_quantized, x = input_15)[name = string("linear_4")];
             string input_19_mode_0 = const()[name = string("input_19_mode_0"), val = string("EXACT")];
             tensor<fp32, [1, 512, 3072]> input_19 = gelu(mode = input_19_mode_0, x = input_17)[name = string("input_19")];
+            tensor<fp32, [1, 512, 768]> input_21 = linear(bias = base_model_roberta_encoder_layer_0_output_dense_bias, weight = base_model_roberta_encoder_layer_0_output_dense_weight_quantized, x = input_19)[name = string("linear_5")];
             tensor<fp32, [1, 512, 768]> input_23 = add(x = input_21, y = input_15)[name = string("input_23")];
             tensor<int32, [1]> hidden_states_7_axes_0 = const()[name = string("hidden_states_7_axes_0"), val = tensor<int32, [1]>([-1])];
             tensor<fp32, [1, 512, 768]> hidden_states_7 = layer_norm(axes = hidden_states_7_axes_0, beta = base_model_roberta_encoder_layer_0_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_0_output_LayerNorm_weight, x = input_23)[name = string("hidden_states_7")];
+            tensor<fp32, [1, 512, 768]> x_13 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_query_bias, weight = base_model_roberta_encoder_layer_1_attention_self_query_weight_quantized, x = hidden_states_7)[name = string("linear_6")];
             tensor<int32, [4]> var_169 = const()[name = string("op_169"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x_15 = reshape(shape = var_169, x = x_13)[name = string("x_15")];
+            tensor<fp32, [1, 512, 768]> x_17 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_key_bias, weight = base_model_roberta_encoder_layer_1_attention_self_key_weight_quantized, x = hidden_states_7)[name = string("linear_7")];
             tensor<int32, [4]> var_178 = const()[name = string("op_178"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x_19 = reshape(shape = var_178, x = x_17)[name = string("x_19")];
+            tensor<fp32, [1, 512, 768]> x_21 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_value_bias, weight = base_model_roberta_encoder_layer_1_attention_self_value_weight_quantized, x = hidden_states_7)[name = string("linear_8")];
             tensor<int32, [4]> var_187 = const()[name = string("op_187"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x_23 = reshape(shape = var_187, x = x_21)[name = string("x_23")];
             tensor<int32, [4]> transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
             tensor<int32, [3]> var_193 = const()[name = string("op_193"), val = tensor<int32, [3]>([1, 512, 768])];
             tensor<fp32, [1, 512, 12, 64]> attn_output_7 = transpose(perm = attn_output_7_perm_0, x = attn_output_5)[name = string("transpose_31")];
             tensor<fp32, [1, 512, 768]> input_25 = reshape(shape = var_193, x = attn_output_7)[name = string("input_25")];
+            tensor<fp32, [1, 512, 768]> input_27 = linear(bias = base_model_roberta_encoder_layer_1_attention_output_dense_bias, weight = base_model_roberta_encoder_layer_1_attention_output_dense_weight_quantized, x = input_25)[name = string("linear_9")];
             tensor<fp32, [1, 512, 768]> input_29 = add(x = input_27, y = hidden_states_7)[name = string("input_29")];
             tensor<int32, [1]> input_31_axes_0 = const()[name = string("input_31_axes_0"), val = tensor<int32, [1]>([-1])];
             tensor<fp32, [1, 512, 768]> input_31 = layer_norm(axes = input_31_axes_0, beta = base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight, x = input_29)[name = string("input_31")];
+            tensor<fp32, [1, 512, 3072]> input_33 = linear(bias = base_model_roberta_encoder_layer_1_intermediate_dense_bias, weight = base_model_roberta_encoder_layer_1_intermediate_dense_weight_quantized, x = input_31)[name = string("linear_10")];
             string input_35_mode_0 = const()[name = string("input_35_mode_0"), val = string("EXACT")];
             tensor<fp32, [1, 512, 3072]> input_35 = gelu(mode = input_35_mode_0, x = input_33)[name = string("input_35")];
+            tensor<fp32, [1, 512, 768]> input_37 = linear(bias = base_model_roberta_encoder_layer_1_output_dense_bias, weight = base_model_roberta_encoder_layer_1_output_dense_weight_quantized, x = input_35)[name = string("linear_11")];
             tensor<fp32, [1, 512, 768]> input_39 = add(x = input_37, y = input_31)[name = string("input_39")];
             tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
             tensor<fp32, [1, 512, 768]> hidden_states_13 = layer_norm(axes = hidden_states_13_axes_0, beta = base_model_roberta_encoder_layer_1_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_1_output_LayerNorm_weight, x = input_39)[name = string("hidden_states_13")];
+            tensor<fp32, [1, 512, 768]> x_25 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_query_bias, weight = base_model_roberta_encoder_layer_2_attention_self_query_weight_quantized, x = hidden_states_13)[name = string("linear_12")];
             tensor<int32, [4]> var_237 = const()[name = string("op_237"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x_27 = reshape(shape = var_237, x = x_25)[name = string("x_27")];
+            tensor<fp32, [1, 512, 768]> x_29 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_key_bias, weight = base_model_roberta_encoder_layer_2_attention_self_key_weight_quantized, x = hidden_states_13)[name = string("linear_13")];
             tensor<int32, [4]> var_246 = const()[name = string("op_246"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x_31 = reshape(shape = var_246, x = x_29)[name = string("x_31")];
+            tensor<fp32, [1, 512, 768]> x_33 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_value_bias, weight = base_model_roberta_encoder_layer_2_attention_self_value_weight_quantized, x = hidden_states_13)[name = string("linear_14")];
             tensor<int32, [4]> var_255 = const()[name = string("op_255"), val = tensor<int32, [4]>([1, 512, 12, 64])];
             tensor<fp32, [1, 512, 12, 64]> x = reshape(shape = var_255, x = x_33)[name = string("x")];
             tensor<int32, [4]> transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
             tensor<int32, [3]> var_261 = const()[name = string("op_261"), val = tensor<int32, [3]>([1, 512, 768])];
             tensor<fp32, [1, 512, 12, 64]> attn_output = transpose(perm = attn_output_perm_0, x = attn_output_9)[name = string("transpose_27")];
             tensor<fp32, [1, 512, 768]> input_41 = reshape(shape = var_261, x = attn_output)[name = string("input_41")];
+            tensor<fp32, [1, 512, 768]> input_43 = linear(bias = base_model_roberta_encoder_layer_2_attention_output_dense_bias, weight = base_model_roberta_encoder_layer_2_attention_output_dense_weight_quantized, x = input_41)[name = string("linear_15")];
             tensor<fp32, [1, 512, 768]> input_45 = add(x = input_43, y = hidden_states_13)[name = string("input_45")];
             tensor<int32, [1]> input_47_axes_0 = const()[name = string("input_47_axes_0"), val = tensor<int32, [1]>([-1])];
             tensor<fp32, [1, 512, 768]> input_47 = layer_norm(axes = input_47_axes_0, beta = base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight, x = input_45)[name = string("input_47")];
+            tensor<fp32, [1, 512, 3072]> input_49 = linear(bias = base_model_roberta_encoder_layer_2_intermediate_dense_bias, weight = base_model_roberta_encoder_layer_2_intermediate_dense_weight_quantized, x = input_47)[name = string("linear_16")];
             string input_51_mode_0 = const()[name = string("input_51_mode_0"), val = string("EXACT")];
             tensor<fp32, [1, 512, 3072]> input_51 = gelu(mode = input_51_mode_0, x = input_49)[name = string("input_51")];
+            tensor<fp32, [1, 512, 768]> input_53 = linear(bias = base_model_roberta_encoder_layer_2_output_dense_bias, weight = base_model_roberta_encoder_layer_2_output_dense_weight_quantized, x = input_51)[name = string("linear_17")];
             tensor<fp32, [1, 512, 768]> input_55 = add(x = input_53, y = input_47)[name = string("input_55")];
             tensor<int32, [1]> input_57_axes_0 = const()[name = string("input_57_axes_0"), val = tensor<int32, [1]>([-1])];
             tensor<fp32, [1, 512, 768]> input_57 = layer_norm(axes = input_57_axes_0, beta = base_model_roberta_encoder_layer_2_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_2_output_LayerNorm_weight, x = input_55)[name = string("input_57")];

SaT.mlmodelc/weights/weight.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7690c08745205045ab57378cffcf8933e8b10f7471517f6ab7d43ae21ab9ea5a
-size 856225216

 version https://git-lfs.github.com/spec/v1
+oid sha256:08b5d724996325ea649fad4b140360e29a08a1eb6ecdb4329455a55bd6973c59
+size 240860864