Sachin Desai commited on
Commit
87f6e26
·
1 Parent(s): c4a5e6d

quantized model

Browse files
SaT.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3c7ca81d83547e93951dc7173fd9cab1828134887dca4ff988bbcd1f0eaccf5
3
  size 241
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f64f977399bfea6387639b0aa47c328e1ccb5c72ec192fafebe6a6a037482aa1
3
  size 241
SaT.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bcd0b3ee1481feafa7931ba0eb87d25bff4c4370ed2fbe74c8a44b3e05dc505
3
  size 347
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f7420b0c2ff587bdc15eb8b95e6adbc1d3ef598a76984e5cf6635be9344da29
3
  size 347
SaT.mlmodelc/metadata.json CHANGED
@@ -1,7 +1,7 @@
1
  [
2
  {
3
  "metadataOutputVersion" : "3.0",
4
- "storagePrecision" : "Float32",
5
  "outputSchema" : [
6
  {
7
  "hasShapeFlexibility" : "0",
@@ -29,9 +29,10 @@
29
  "Ios16.cumsum" : 1,
30
  "Ios18.add" : 9,
31
  "Ios18.layerNorm" : 7,
32
- "Ios18.transpose" : 12,
33
  "Ios18.cast" : 2,
 
34
  "Ios18.reshape" : 12,
 
35
  "Ios18.mul" : 2
36
  },
37
  "computePrecision" : "Mixed (Float32, Int32)",
@@ -51,9 +52,9 @@
51
  "name" : "MLModelType_mlProgram"
52
  },
53
  "userDefinedMetadata" : {
54
- "com.github.apple.coremltools.version" : "8.3.0",
55
  "com.github.apple.coremltools.source_dialect" : "TorchScript",
56
- "com.github.apple.coremltools.source" : "torch==2.9.0"
 
57
  },
58
  "inputSchema" : [
59
  {
 
1
  [
2
  {
3
  "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Mixed (Float32, Int8)",
5
  "outputSchema" : [
6
  {
7
  "hasShapeFlexibility" : "0",
 
29
  "Ios16.cumsum" : 1,
30
  "Ios18.add" : 9,
31
  "Ios18.layerNorm" : 7,
 
32
  "Ios18.cast" : 2,
33
+ "Ios18.transpose" : 12,
34
  "Ios18.reshape" : 12,
35
+ "Ios18.constexprBlockwiseShiftScale" : 21,
36
  "Ios18.mul" : 2
37
  },
38
  "computePrecision" : "Mixed (Float32, Int32)",
 
52
  "name" : "MLModelType_mlProgram"
53
  },
54
  "userDefinedMetadata" : {
 
55
  "com.github.apple.coremltools.source_dialect" : "TorchScript",
56
+ "com.github.apple.coremltools.source" : "torch==2.9.0",
57
+ "com.github.apple.coremltools.version" : "8.3.0"
58
  },
59
  "inputSchema" : [
60
  {
SaT.mlmodelc/model.mil CHANGED
@@ -1,61 +1,61 @@
1
  program(1.3)
2
- [buildInfo = dict<string, string>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.9.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
3
  {
4
  func main<ios18>(tensor<int32, [1, 512]> attention_mask, tensor<int32, [1, 512]> input_ids) {
5
- tensor<fp32, [250002, 768]> base_model_roberta_embeddings_word_embeddings_weight = const()[name = string("base_model_roberta_embeddings_word_embeddings_weight"), val = tensor<fp32, [250002, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
6
- tensor<fp32, [514, 768]> base_model_roberta_embeddings_position_embeddings_weight = const()[name = string("base_model_roberta_embeddings_position_embeddings_weight"), val = tensor<fp32, [514, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(768006272)))];
7
- tensor<fp32, [768]> base_model_roberta_embeddings_LayerNorm_bias = const()[name = string("base_model_roberta_embeddings_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769585344)))];
8
- tensor<fp32, [768]> base_model_roberta_embeddings_LayerNorm_weight = const()[name = string("base_model_roberta_embeddings_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769588480)))];
9
- tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769591616)))];
10
- tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_self_query_weight = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_query_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(769594752)))];
11
- tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(771954112)))];
12
- tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_self_key_weight = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_key_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(771957248)))];
13
- tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774316608)))];
14
- tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_self_value_weight = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_value_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774319744)))];
15
- tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(776679104)))];
16
- tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_output_dense_weight = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_dense_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(776682240)))];
17
- tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(779041600)))];
18
- tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(779044736)))];
19
- tensor<fp32, [3072]> base_model_roberta_encoder_layer_0_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(779047872)))];
20
- tensor<fp32, [3072, 768]> base_model_roberta_encoder_layer_0_intermediate_dense_weight = const()[name = string("base_model_roberta_encoder_layer_0_intermediate_dense_weight"), val = tensor<fp32, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(779060224)))];
21
- tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(788497472)))];
22
- tensor<fp32, [768, 3072]> base_model_roberta_encoder_layer_0_output_dense_weight = const()[name = string("base_model_roberta_encoder_layer_0_output_dense_weight"), val = tensor<fp32, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(788500608)))];
23
- tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_0_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797937856)))];
24
- tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_0_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797940992)))];
25
- tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797944128)))];
26
- tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_self_query_weight = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_query_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(797947264)))];
27
- tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800306624)))];
28
- tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_self_key_weight = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_key_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(800309760)))];
29
- tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(802669120)))];
30
- tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_self_value_weight = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_value_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(802672256)))];
31
- tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(805031616)))];
32
- tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_output_dense_weight = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_dense_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(805034752)))];
33
- tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807394112)))];
34
- tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807397248)))];
35
- tensor<fp32, [3072]> base_model_roberta_encoder_layer_1_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807400384)))];
36
- tensor<fp32, [3072, 768]> base_model_roberta_encoder_layer_1_intermediate_dense_weight = const()[name = string("base_model_roberta_encoder_layer_1_intermediate_dense_weight"), val = tensor<fp32, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(807412736)))];
37
- tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816849984)))];
38
- tensor<fp32, [768, 3072]> base_model_roberta_encoder_layer_1_output_dense_weight = const()[name = string("base_model_roberta_encoder_layer_1_output_dense_weight"), val = tensor<fp32, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(816853120)))];
39
- tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_1_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826290368)))];
40
- tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_1_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826293504)))];
41
- tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826296640)))];
42
- tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_self_query_weight = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_query_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(826299776)))];
43
- tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(828659136)))];
44
- tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_self_key_weight = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_key_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(828662272)))];
45
- tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(831021632)))];
46
- tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_self_value_weight = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_value_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(831024768)))];
47
- tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833384128)))];
48
- tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_output_dense_weight = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_dense_weight"), val = tensor<fp32, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833387264)))];
49
- tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835746624)))];
50
- tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835749760)))];
51
- tensor<fp32, [3072]> base_model_roberta_encoder_layer_2_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835752896)))];
52
- tensor<fp32, [3072, 768]> base_model_roberta_encoder_layer_2_intermediate_dense_weight = const()[name = string("base_model_roberta_encoder_layer_2_intermediate_dense_weight"), val = tensor<fp32, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(835765248)))];
53
- tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(845202496)))];
54
- tensor<fp32, [768, 3072]> base_model_roberta_encoder_layer_2_output_dense_weight = const()[name = string("base_model_roberta_encoder_layer_2_output_dense_weight"), val = tensor<fp32, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(845205632)))];
55
- tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_2_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854642880)))];
56
- tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_2_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854646016)))];
57
  tensor<fp32, [1]> base_model_classifier_bias = const()[name = string("base_model_classifier_bias"), val = tensor<fp32, [1]>([0x1.679ac8p-12])];
58
- tensor<fp32, [1, 768]> base_model_classifier_weight = const()[name = string("base_model_classifier_weight"), val = tensor<fp32, [1, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854649152)))];
59
  int32 var_15 = const()[name = string("op_15"), val = int32(1)];
60
  fp32 var_22 = const()[name = string("op_22"), val = fp32(0x1p+0)];
61
  fp32 var_25 = const()[name = string("op_25"), val = fp32(0x1.4f8b58p-17)];
@@ -64,7 +64,7 @@ program(1.3)
64
  tensor<int32, [1]> var_47_axes_0 = const()[name = string("op_47_axes_0"), val = tensor<int32, [1]>([2])];
65
  tensor<int32, [1, 1, 1, 512]> var_47 = expand_dims(axes = var_47_axes_0, x = var_46)[name = string("op_47")];
66
  string cast_2_dtype_0 = const()[name = string("cast_2_dtype_0"), val = string("fp32")];
67
- tensor<fp32, [1, 1, 1, 512]> cast_2 = cast(dtype = cast_2_dtype_0, x = var_47)[name = string("cast_22")];
68
  tensor<fp32, [1, 1, 1, 512]> var_50 = sub(x = var_22, y = cast_2)[name = string("op_50")];
69
  fp32 var_51 = const()[name = string("op_51"), val = fp32(-0x1.fffffep+127)];
70
  tensor<fp32, [1, 1, 1, 512]> attention_mask_1 = mul(x = var_50, y = var_51)[name = string("attention_mask")];
@@ -72,7 +72,7 @@ program(1.3)
72
  string cast_3_dtype_0 = const()[name = string("cast_3_dtype_0"), val = string("int32")];
73
  bool var_59_exclusive_0 = const()[name = string("op_59_exclusive_0"), val = bool(false)];
74
  bool var_59_reverse_0 = const()[name = string("op_59_reverse_0"), val = bool(false)];
75
- tensor<int32, [1, 512]> cast_3 = cast(dtype = cast_3_dtype_0, x = var_57)[name = string("cast_21")];
76
  tensor<int32, [1, 512]> var_59 = cumsum(axis = var_15, exclusive = var_59_exclusive_0, reverse = var_59_reverse_0, x = cast_3)[name = string("op_59")];
77
  tensor<int32, [1, 512]> incremental_indices = mul(x = var_59, y = cast_3)[name = string("incremental_indices")];
78
  int32 var_65 = const()[name = string("op_65"), val = int32(1)];
@@ -80,23 +80,23 @@ program(1.3)
80
  int32 inputs_embeds_axis_0 = const()[name = string("inputs_embeds_axis_0"), val = int32(0)];
81
  int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)];
82
  bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)];
83
- tensor<fp32, [1, 512, 768]> inputs_embeds = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = input_ids, validate_indices = inputs_embeds_validate_indices_0, x = base_model_roberta_embeddings_word_embeddings_weight)[name = string("inputs_embeds")];
84
- tensor<fp32, [1, 512, 768]> token_type_embeddings_1 = const()[name = string("token_type_embeddings_1"), val = tensor<fp32, [1, 512, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854652288)))];
85
- tensor<fp32, [1, 512, 768]> embeddings_1 = add(x = inputs_embeds, y = token_type_embeddings_1)[name = string("embeddings_1")];
86
  int32 position_embeddings_1_axis_0 = const()[name = string("position_embeddings_1_axis_0"), val = int32(0)];
87
  int32 position_embeddings_1_batch_dims_0 = const()[name = string("position_embeddings_1_batch_dims_0"), val = int32(0)];
88
  bool position_embeddings_1_validate_indices_0 = const()[name = string("position_embeddings_1_validate_indices_0"), val = bool(false)];
89
- tensor<fp32, [1, 512, 768]> position_embeddings_1 = gather(axis = position_embeddings_1_axis_0, batch_dims = position_embeddings_1_batch_dims_0, indices = input_3, validate_indices = position_embeddings_1_validate_indices_0, x = base_model_roberta_embeddings_position_embeddings_weight)[name = string("position_embeddings_1")];
90
  tensor<fp32, [1, 512, 768]> input_5 = add(x = embeddings_1, y = position_embeddings_1)[name = string("input_5")];
91
  tensor<int32, [1]> input_7_axes_0 = const()[name = string("input_7_axes_0"), val = tensor<int32, [1]>([-1])];
92
  tensor<fp32, [1, 512, 768]> input_7 = layer_norm(axes = input_7_axes_0, beta = base_model_roberta_embeddings_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_embeddings_LayerNorm_weight, x = input_5)[name = string("input_7")];
93
- tensor<fp32, [1, 512, 768]> x_1 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_query_bias, weight = base_model_roberta_encoder_layer_0_attention_self_query_weight, x = input_7)[name = string("linear_0")];
94
  tensor<int32, [4]> var_101 = const()[name = string("op_101"), val = tensor<int32, [4]>([1, 512, 12, 64])];
95
  tensor<fp32, [1, 512, 12, 64]> x_3 = reshape(shape = var_101, x = x_1)[name = string("x_3")];
96
- tensor<fp32, [1, 512, 768]> x_5 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_key_bias, weight = base_model_roberta_encoder_layer_0_attention_self_key_weight, x = input_7)[name = string("linear_1")];
97
  tensor<int32, [4]> var_110 = const()[name = string("op_110"), val = tensor<int32, [4]>([1, 512, 12, 64])];
98
  tensor<fp32, [1, 512, 12, 64]> x_7 = reshape(shape = var_110, x = x_5)[name = string("x_7")];
99
- tensor<fp32, [1, 512, 768]> x_9 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_value_bias, weight = base_model_roberta_encoder_layer_0_attention_self_value_weight, x = input_7)[name = string("linear_2")];
100
  tensor<int32, [4]> var_119 = const()[name = string("op_119"), val = tensor<int32, [4]>([1, 512, 12, 64])];
101
  tensor<fp32, [1, 512, 12, 64]> x_11 = reshape(shape = var_119, x = x_9)[name = string("x_11")];
102
  tensor<int32, [4]> transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
@@ -110,24 +110,24 @@ program(1.3)
110
  tensor<int32, [3]> var_125 = const()[name = string("op_125"), val = tensor<int32, [3]>([1, 512, 768])];
111
  tensor<fp32, [1, 512, 12, 64]> attn_output_3 = transpose(perm = attn_output_3_perm_0, x = attn_output_1)[name = string("transpose_35")];
112
  tensor<fp32, [1, 512, 768]> input_9 = reshape(shape = var_125, x = attn_output_3)[name = string("input_9")];
113
- tensor<fp32, [1, 512, 768]> input_11 = linear(bias = base_model_roberta_encoder_layer_0_attention_output_dense_bias, weight = base_model_roberta_encoder_layer_0_attention_output_dense_weight, x = input_9)[name = string("linear_3")];
114
  tensor<fp32, [1, 512, 768]> input_13 = add(x = input_11, y = input_7)[name = string("input_13")];
115
  tensor<int32, [1]> input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor<int32, [1]>([-1])];
116
  tensor<fp32, [1, 512, 768]> input_15 = layer_norm(axes = input_15_axes_0, beta = base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight, x = input_13)[name = string("input_15")];
117
- tensor<fp32, [1, 512, 3072]> input_17 = linear(bias = base_model_roberta_encoder_layer_0_intermediate_dense_bias, weight = base_model_roberta_encoder_layer_0_intermediate_dense_weight, x = input_15)[name = string("linear_4")];
118
  string input_19_mode_0 = const()[name = string("input_19_mode_0"), val = string("EXACT")];
119
  tensor<fp32, [1, 512, 3072]> input_19 = gelu(mode = input_19_mode_0, x = input_17)[name = string("input_19")];
120
- tensor<fp32, [1, 512, 768]> input_21 = linear(bias = base_model_roberta_encoder_layer_0_output_dense_bias, weight = base_model_roberta_encoder_layer_0_output_dense_weight, x = input_19)[name = string("linear_5")];
121
  tensor<fp32, [1, 512, 768]> input_23 = add(x = input_21, y = input_15)[name = string("input_23")];
122
  tensor<int32, [1]> hidden_states_7_axes_0 = const()[name = string("hidden_states_7_axes_0"), val = tensor<int32, [1]>([-1])];
123
  tensor<fp32, [1, 512, 768]> hidden_states_7 = layer_norm(axes = hidden_states_7_axes_0, beta = base_model_roberta_encoder_layer_0_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_0_output_LayerNorm_weight, x = input_23)[name = string("hidden_states_7")];
124
- tensor<fp32, [1, 512, 768]> x_13 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_query_bias, weight = base_model_roberta_encoder_layer_1_attention_self_query_weight, x = hidden_states_7)[name = string("linear_6")];
125
  tensor<int32, [4]> var_169 = const()[name = string("op_169"), val = tensor<int32, [4]>([1, 512, 12, 64])];
126
  tensor<fp32, [1, 512, 12, 64]> x_15 = reshape(shape = var_169, x = x_13)[name = string("x_15")];
127
- tensor<fp32, [1, 512, 768]> x_17 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_key_bias, weight = base_model_roberta_encoder_layer_1_attention_self_key_weight, x = hidden_states_7)[name = string("linear_7")];
128
  tensor<int32, [4]> var_178 = const()[name = string("op_178"), val = tensor<int32, [4]>([1, 512, 12, 64])];
129
  tensor<fp32, [1, 512, 12, 64]> x_19 = reshape(shape = var_178, x = x_17)[name = string("x_19")];
130
- tensor<fp32, [1, 512, 768]> x_21 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_value_bias, weight = base_model_roberta_encoder_layer_1_attention_self_value_weight, x = hidden_states_7)[name = string("linear_8")];
131
  tensor<int32, [4]> var_187 = const()[name = string("op_187"), val = tensor<int32, [4]>([1, 512, 12, 64])];
132
  tensor<fp32, [1, 512, 12, 64]> x_23 = reshape(shape = var_187, x = x_21)[name = string("x_23")];
133
  tensor<int32, [4]> transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
@@ -141,24 +141,24 @@ program(1.3)
141
  tensor<int32, [3]> var_193 = const()[name = string("op_193"), val = tensor<int32, [3]>([1, 512, 768])];
142
  tensor<fp32, [1, 512, 12, 64]> attn_output_7 = transpose(perm = attn_output_7_perm_0, x = attn_output_5)[name = string("transpose_31")];
143
  tensor<fp32, [1, 512, 768]> input_25 = reshape(shape = var_193, x = attn_output_7)[name = string("input_25")];
144
- tensor<fp32, [1, 512, 768]> input_27 = linear(bias = base_model_roberta_encoder_layer_1_attention_output_dense_bias, weight = base_model_roberta_encoder_layer_1_attention_output_dense_weight, x = input_25)[name = string("linear_9")];
145
  tensor<fp32, [1, 512, 768]> input_29 = add(x = input_27, y = hidden_states_7)[name = string("input_29")];
146
  tensor<int32, [1]> input_31_axes_0 = const()[name = string("input_31_axes_0"), val = tensor<int32, [1]>([-1])];
147
  tensor<fp32, [1, 512, 768]> input_31 = layer_norm(axes = input_31_axes_0, beta = base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight, x = input_29)[name = string("input_31")];
148
- tensor<fp32, [1, 512, 3072]> input_33 = linear(bias = base_model_roberta_encoder_layer_1_intermediate_dense_bias, weight = base_model_roberta_encoder_layer_1_intermediate_dense_weight, x = input_31)[name = string("linear_10")];
149
  string input_35_mode_0 = const()[name = string("input_35_mode_0"), val = string("EXACT")];
150
  tensor<fp32, [1, 512, 3072]> input_35 = gelu(mode = input_35_mode_0, x = input_33)[name = string("input_35")];
151
- tensor<fp32, [1, 512, 768]> input_37 = linear(bias = base_model_roberta_encoder_layer_1_output_dense_bias, weight = base_model_roberta_encoder_layer_1_output_dense_weight, x = input_35)[name = string("linear_11")];
152
  tensor<fp32, [1, 512, 768]> input_39 = add(x = input_37, y = input_31)[name = string("input_39")];
153
  tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
154
  tensor<fp32, [1, 512, 768]> hidden_states_13 = layer_norm(axes = hidden_states_13_axes_0, beta = base_model_roberta_encoder_layer_1_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_1_output_LayerNorm_weight, x = input_39)[name = string("hidden_states_13")];
155
- tensor<fp32, [1, 512, 768]> x_25 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_query_bias, weight = base_model_roberta_encoder_layer_2_attention_self_query_weight, x = hidden_states_13)[name = string("linear_12")];
156
  tensor<int32, [4]> var_237 = const()[name = string("op_237"), val = tensor<int32, [4]>([1, 512, 12, 64])];
157
  tensor<fp32, [1, 512, 12, 64]> x_27 = reshape(shape = var_237, x = x_25)[name = string("x_27")];
158
- tensor<fp32, [1, 512, 768]> x_29 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_key_bias, weight = base_model_roberta_encoder_layer_2_attention_self_key_weight, x = hidden_states_13)[name = string("linear_13")];
159
  tensor<int32, [4]> var_246 = const()[name = string("op_246"), val = tensor<int32, [4]>([1, 512, 12, 64])];
160
  tensor<fp32, [1, 512, 12, 64]> x_31 = reshape(shape = var_246, x = x_29)[name = string("x_31")];
161
- tensor<fp32, [1, 512, 768]> x_33 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_value_bias, weight = base_model_roberta_encoder_layer_2_attention_self_value_weight, x = hidden_states_13)[name = string("linear_14")];
162
  tensor<int32, [4]> var_255 = const()[name = string("op_255"), val = tensor<int32, [4]>([1, 512, 12, 64])];
163
  tensor<fp32, [1, 512, 12, 64]> x = reshape(shape = var_255, x = x_33)[name = string("x")];
164
  tensor<int32, [4]> transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
@@ -172,14 +172,14 @@ program(1.3)
172
  tensor<int32, [3]> var_261 = const()[name = string("op_261"), val = tensor<int32, [3]>([1, 512, 768])];
173
  tensor<fp32, [1, 512, 12, 64]> attn_output = transpose(perm = attn_output_perm_0, x = attn_output_9)[name = string("transpose_27")];
174
  tensor<fp32, [1, 512, 768]> input_41 = reshape(shape = var_261, x = attn_output)[name = string("input_41")];
175
- tensor<fp32, [1, 512, 768]> input_43 = linear(bias = base_model_roberta_encoder_layer_2_attention_output_dense_bias, weight = base_model_roberta_encoder_layer_2_attention_output_dense_weight, x = input_41)[name = string("linear_15")];
176
  tensor<fp32, [1, 512, 768]> input_45 = add(x = input_43, y = hidden_states_13)[name = string("input_45")];
177
  tensor<int32, [1]> input_47_axes_0 = const()[name = string("input_47_axes_0"), val = tensor<int32, [1]>([-1])];
178
  tensor<fp32, [1, 512, 768]> input_47 = layer_norm(axes = input_47_axes_0, beta = base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight, x = input_45)[name = string("input_47")];
179
- tensor<fp32, [1, 512, 3072]> input_49 = linear(bias = base_model_roberta_encoder_layer_2_intermediate_dense_bias, weight = base_model_roberta_encoder_layer_2_intermediate_dense_weight, x = input_47)[name = string("linear_16")];
180
  string input_51_mode_0 = const()[name = string("input_51_mode_0"), val = string("EXACT")];
181
  tensor<fp32, [1, 512, 3072]> input_51 = gelu(mode = input_51_mode_0, x = input_49)[name = string("input_51")];
182
- tensor<fp32, [1, 512, 768]> input_53 = linear(bias = base_model_roberta_encoder_layer_2_output_dense_bias, weight = base_model_roberta_encoder_layer_2_output_dense_weight, x = input_51)[name = string("linear_17")];
183
  tensor<fp32, [1, 512, 768]> input_55 = add(x = input_53, y = input_47)[name = string("input_55")];
184
  tensor<int32, [1]> input_57_axes_0 = const()[name = string("input_57_axes_0"), val = tensor<int32, [1]>([-1])];
185
  tensor<fp32, [1, 512, 768]> input_57 = layer_norm(axes = input_57_axes_0, beta = base_model_roberta_encoder_layer_2_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_2_output_LayerNorm_weight, x = input_55)[name = string("input_57")];
 
1
  program(1.3)
2
+ [buildInfo = dict<string, string>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})]
3
  {
4
  func main<ios18>(tensor<int32, [1, 512]> attention_mask, tensor<int32, [1, 512]> input_ids) {
5
+ tensor<fp32, [250002, 768]> base_model_roberta_embeddings_word_embeddings_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [250002, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor<fp32, [250002, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192001664))))[name = string("base_model_roberta_embeddings_word_embeddings_weight_quantized")];
6
+ tensor<fp32, [514, 768]> base_model_roberta_embeddings_position_embeddings_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [514, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216001920))), scale = tensor<fp32, [514, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216396736))))[name = string("base_model_roberta_embeddings_position_embeddings_weight_quantized")];
7
+ tensor<fp32, [768]> base_model_roberta_embeddings_LayerNorm_bias = const()[name = string("base_model_roberta_embeddings_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216446144)))];
8
+ tensor<fp32, [768]> base_model_roberta_embeddings_LayerNorm_weight = const()[name = string("base_model_roberta_embeddings_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216449280)))];
9
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216452416)))];
10
+ tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_self_query_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216455552))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217045440))))[name = string("base_model_roberta_encoder_layer_0_attention_self_query_weight_quantized")];
11
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217119232)))];
12
+ tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_self_key_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217122368))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217712256))))[name = string("base_model_roberta_encoder_layer_0_attention_self_key_weight_quantized")];
13
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217786048)))];
14
+ tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_self_value_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217789184))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218379072))))[name = string("base_model_roberta_encoder_layer_0_attention_self_value_weight_quantized")];
15
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218452864)))];
16
+ tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218456000))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219045888))))[name = string("base_model_roberta_encoder_layer_0_attention_output_dense_weight_quantized")];
17
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219119680)))];
18
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219122816)))];
19
+ tensor<fp32, [3072]> base_model_roberta_encoder_layer_0_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219125952)))];
20
+ tensor<fp32, [3072, 768]> base_model_roberta_encoder_layer_0_intermediate_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219138304))), scale = tensor<fp32, [3072, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221497664))))[name = string("base_model_roberta_encoder_layer_0_intermediate_dense_weight_quantized")];
21
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221792640)))];
22
+ tensor<fp32, [768, 3072]> base_model_roberta_encoder_layer_0_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221795776))), scale = tensor<fp32, [768, 96]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224155136))))[name = string("base_model_roberta_encoder_layer_0_output_dense_weight_quantized")];
23
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_0_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224450112)))];
24
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_0_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224453248)))];
25
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224456384)))];
26
+ tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_self_query_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224459520))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225049408))))[name = string("base_model_roberta_encoder_layer_1_attention_self_query_weight_quantized")];
27
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225123200)))];
28
+ tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_self_key_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225126336))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225716224))))[name = string("base_model_roberta_encoder_layer_1_attention_self_key_weight_quantized")];
29
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225790016)))];
30
+ tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_self_value_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225793152))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226383040))))[name = string("base_model_roberta_encoder_layer_1_attention_self_value_weight_quantized")];
31
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226456832)))];
32
+ tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226459968))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227049856))))[name = string("base_model_roberta_encoder_layer_1_attention_output_dense_weight_quantized")];
33
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227123648)))];
34
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227126784)))];
35
+ tensor<fp32, [3072]> base_model_roberta_encoder_layer_1_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227129920)))];
36
+ tensor<fp32, [3072, 768]> base_model_roberta_encoder_layer_1_intermediate_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227142272))), scale = tensor<fp32, [3072, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229501632))))[name = string("base_model_roberta_encoder_layer_1_intermediate_dense_weight_quantized")];
37
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229796608)))];
38
+ tensor<fp32, [768, 3072]> base_model_roberta_encoder_layer_1_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229799744))), scale = tensor<fp32, [768, 96]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232159104))))[name = string("base_model_roberta_encoder_layer_1_output_dense_weight_quantized")];
39
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_1_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232454080)))];
40
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_1_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232457216)))];
41
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232460352)))];
42
+ tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_self_query_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232463488))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233053376))))[name = string("base_model_roberta_encoder_layer_2_attention_self_query_weight_quantized")];
43
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233127168)))];
44
+ tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_self_key_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233130304))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233720192))))[name = string("base_model_roberta_encoder_layer_2_attention_self_key_weight_quantized")];
45
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233793984)))];
46
+ tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_self_value_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233797120))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234387008))))[name = string("base_model_roberta_encoder_layer_2_attention_self_value_weight_quantized")];
47
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234460800)))];
48
+ tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234463936))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235053824))))[name = string("base_model_roberta_encoder_layer_2_attention_output_dense_weight_quantized")];
49
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235127616)))];
50
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235130752)))];
51
+ tensor<fp32, [3072]> base_model_roberta_encoder_layer_2_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235133888)))];
52
+ tensor<fp32, [3072, 768]> base_model_roberta_encoder_layer_2_intermediate_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235146240))), scale = tensor<fp32, [3072, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237505600))))[name = string("base_model_roberta_encoder_layer_2_intermediate_dense_weight_quantized")];
53
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237800576)))];
54
+ tensor<fp32, [768, 3072]> base_model_roberta_encoder_layer_2_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237803712))), scale = tensor<fp32, [768, 96]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240163072))))[name = string("base_model_roberta_encoder_layer_2_output_dense_weight_quantized")];
55
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_2_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240458048)))];
56
+ tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_2_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240461184)))];
57
  tensor<fp32, [1]> base_model_classifier_bias = const()[name = string("base_model_classifier_bias"), val = tensor<fp32, [1]>([0x1.679ac8p-12])];
58
+ tensor<fp32, [1, 768]> base_model_classifier_weight = const()[name = string("base_model_classifier_weight"), val = tensor<fp32, [1, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240464320)))];
59
  int32 var_15 = const()[name = string("op_15"), val = int32(1)];
60
  fp32 var_22 = const()[name = string("op_22"), val = fp32(0x1p+0)];
61
  fp32 var_25 = const()[name = string("op_25"), val = fp32(0x1.4f8b58p-17)];
 
64
  tensor<int32, [1]> var_47_axes_0 = const()[name = string("op_47_axes_0"), val = tensor<int32, [1]>([2])];
65
  tensor<int32, [1, 1, 1, 512]> var_47 = expand_dims(axes = var_47_axes_0, x = var_46)[name = string("op_47")];
66
  string cast_2_dtype_0 = const()[name = string("cast_2_dtype_0"), val = string("fp32")];
67
+ tensor<fp32, [1, 1, 1, 512]> cast_2 = cast(dtype = cast_2_dtype_0, x = var_47)[name = string("cast_1")];
68
  tensor<fp32, [1, 1, 1, 512]> var_50 = sub(x = var_22, y = cast_2)[name = string("op_50")];
69
  fp32 var_51 = const()[name = string("op_51"), val = fp32(-0x1.fffffep+127)];
70
  tensor<fp32, [1, 1, 1, 512]> attention_mask_1 = mul(x = var_50, y = var_51)[name = string("attention_mask")];
 
72
  string cast_3_dtype_0 = const()[name = string("cast_3_dtype_0"), val = string("int32")];
73
  bool var_59_exclusive_0 = const()[name = string("op_59_exclusive_0"), val = bool(false)];
74
  bool var_59_reverse_0 = const()[name = string("op_59_reverse_0"), val = bool(false)];
75
+ tensor<int32, [1, 512]> cast_3 = cast(dtype = cast_3_dtype_0, x = var_57)[name = string("cast_0")];
76
  tensor<int32, [1, 512]> var_59 = cumsum(axis = var_15, exclusive = var_59_exclusive_0, reverse = var_59_reverse_0, x = cast_3)[name = string("op_59")];
77
  tensor<int32, [1, 512]> incremental_indices = mul(x = var_59, y = cast_3)[name = string("incremental_indices")];
78
  int32 var_65 = const()[name = string("op_65"), val = int32(1)];
 
80
  int32 inputs_embeds_axis_0 = const()[name = string("inputs_embeds_axis_0"), val = int32(0)];
81
  int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)];
82
  bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)];
83
+ tensor<fp32, [1, 512, 768]> inputs_embeds = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = input_ids, validate_indices = inputs_embeds_validate_indices_0, x = base_model_roberta_embeddings_word_embeddings_weight_quantized)[name = string("inputs_embeds")];
84
+ tensor<fp32, [1, 512, 768]> token_type_embeddings_1_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [1, 512, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240467456))), scale = tensor<fp32, [1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240860736))))[name = string("token_type_embeddings_1_quantized")];
85
+ tensor<fp32, [1, 512, 768]> embeddings_1 = add(x = inputs_embeds, y = token_type_embeddings_1_quantized)[name = string("embeddings_1")];
86
  int32 position_embeddings_1_axis_0 = const()[name = string("position_embeddings_1_axis_0"), val = int32(0)];
87
  int32 position_embeddings_1_batch_dims_0 = const()[name = string("position_embeddings_1_batch_dims_0"), val = int32(0)];
88
  bool position_embeddings_1_validate_indices_0 = const()[name = string("position_embeddings_1_validate_indices_0"), val = bool(false)];
89
+ tensor<fp32, [1, 512, 768]> position_embeddings_1 = gather(axis = position_embeddings_1_axis_0, batch_dims = position_embeddings_1_batch_dims_0, indices = input_3, validate_indices = position_embeddings_1_validate_indices_0, x = base_model_roberta_embeddings_position_embeddings_weight_quantized)[name = string("position_embeddings_1")];
90
  tensor<fp32, [1, 512, 768]> input_5 = add(x = embeddings_1, y = position_embeddings_1)[name = string("input_5")];
91
  tensor<int32, [1]> input_7_axes_0 = const()[name = string("input_7_axes_0"), val = tensor<int32, [1]>([-1])];
92
  tensor<fp32, [1, 512, 768]> input_7 = layer_norm(axes = input_7_axes_0, beta = base_model_roberta_embeddings_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_embeddings_LayerNorm_weight, x = input_5)[name = string("input_7")];
93
+ tensor<fp32, [1, 512, 768]> x_1 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_query_bias, weight = base_model_roberta_encoder_layer_0_attention_self_query_weight_quantized, x = input_7)[name = string("linear_0")];
94
  tensor<int32, [4]> var_101 = const()[name = string("op_101"), val = tensor<int32, [4]>([1, 512, 12, 64])];
95
  tensor<fp32, [1, 512, 12, 64]> x_3 = reshape(shape = var_101, x = x_1)[name = string("x_3")];
96
+ tensor<fp32, [1, 512, 768]> x_5 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_key_bias, weight = base_model_roberta_encoder_layer_0_attention_self_key_weight_quantized, x = input_7)[name = string("linear_1")];
97
  tensor<int32, [4]> var_110 = const()[name = string("op_110"), val = tensor<int32, [4]>([1, 512, 12, 64])];
98
  tensor<fp32, [1, 512, 12, 64]> x_7 = reshape(shape = var_110, x = x_5)[name = string("x_7")];
99
+ tensor<fp32, [1, 512, 768]> x_9 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_value_bias, weight = base_model_roberta_encoder_layer_0_attention_self_value_weight_quantized, x = input_7)[name = string("linear_2")];
100
  tensor<int32, [4]> var_119 = const()[name = string("op_119"), val = tensor<int32, [4]>([1, 512, 12, 64])];
101
  tensor<fp32, [1, 512, 12, 64]> x_11 = reshape(shape = var_119, x = x_9)[name = string("x_11")];
102
  tensor<int32, [4]> transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
 
110
  tensor<int32, [3]> var_125 = const()[name = string("op_125"), val = tensor<int32, [3]>([1, 512, 768])];
111
  tensor<fp32, [1, 512, 12, 64]> attn_output_3 = transpose(perm = attn_output_3_perm_0, x = attn_output_1)[name = string("transpose_35")];
112
  tensor<fp32, [1, 512, 768]> input_9 = reshape(shape = var_125, x = attn_output_3)[name = string("input_9")];
113
+ tensor<fp32, [1, 512, 768]> input_11 = linear(bias = base_model_roberta_encoder_layer_0_attention_output_dense_bias, weight = base_model_roberta_encoder_layer_0_attention_output_dense_weight_quantized, x = input_9)[name = string("linear_3")];
114
  tensor<fp32, [1, 512, 768]> input_13 = add(x = input_11, y = input_7)[name = string("input_13")];
115
  tensor<int32, [1]> input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor<int32, [1]>([-1])];
116
  tensor<fp32, [1, 512, 768]> input_15 = layer_norm(axes = input_15_axes_0, beta = base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight, x = input_13)[name = string("input_15")];
117
+ tensor<fp32, [1, 512, 3072]> input_17 = linear(bias = base_model_roberta_encoder_layer_0_intermediate_dense_bias, weight = base_model_roberta_encoder_layer_0_intermediate_dense_weight_quantized, x = input_15)[name = string("linear_4")];
118
  string input_19_mode_0 = const()[name = string("input_19_mode_0"), val = string("EXACT")];
119
  tensor<fp32, [1, 512, 3072]> input_19 = gelu(mode = input_19_mode_0, x = input_17)[name = string("input_19")];
120
+ tensor<fp32, [1, 512, 768]> input_21 = linear(bias = base_model_roberta_encoder_layer_0_output_dense_bias, weight = base_model_roberta_encoder_layer_0_output_dense_weight_quantized, x = input_19)[name = string("linear_5")];
121
  tensor<fp32, [1, 512, 768]> input_23 = add(x = input_21, y = input_15)[name = string("input_23")];
122
  tensor<int32, [1]> hidden_states_7_axes_0 = const()[name = string("hidden_states_7_axes_0"), val = tensor<int32, [1]>([-1])];
123
  tensor<fp32, [1, 512, 768]> hidden_states_7 = layer_norm(axes = hidden_states_7_axes_0, beta = base_model_roberta_encoder_layer_0_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_0_output_LayerNorm_weight, x = input_23)[name = string("hidden_states_7")];
124
+ tensor<fp32, [1, 512, 768]> x_13 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_query_bias, weight = base_model_roberta_encoder_layer_1_attention_self_query_weight_quantized, x = hidden_states_7)[name = string("linear_6")];
125
  tensor<int32, [4]> var_169 = const()[name = string("op_169"), val = tensor<int32, [4]>([1, 512, 12, 64])];
126
  tensor<fp32, [1, 512, 12, 64]> x_15 = reshape(shape = var_169, x = x_13)[name = string("x_15")];
127
+ tensor<fp32, [1, 512, 768]> x_17 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_key_bias, weight = base_model_roberta_encoder_layer_1_attention_self_key_weight_quantized, x = hidden_states_7)[name = string("linear_7")];
128
  tensor<int32, [4]> var_178 = const()[name = string("op_178"), val = tensor<int32, [4]>([1, 512, 12, 64])];
129
  tensor<fp32, [1, 512, 12, 64]> x_19 = reshape(shape = var_178, x = x_17)[name = string("x_19")];
130
+ tensor<fp32, [1, 512, 768]> x_21 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_value_bias, weight = base_model_roberta_encoder_layer_1_attention_self_value_weight_quantized, x = hidden_states_7)[name = string("linear_8")];
131
  tensor<int32, [4]> var_187 = const()[name = string("op_187"), val = tensor<int32, [4]>([1, 512, 12, 64])];
132
  tensor<fp32, [1, 512, 12, 64]> x_23 = reshape(shape = var_187, x = x_21)[name = string("x_23")];
133
  tensor<int32, [4]> transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
 
141
  tensor<int32, [3]> var_193 = const()[name = string("op_193"), val = tensor<int32, [3]>([1, 512, 768])];
142
  tensor<fp32, [1, 512, 12, 64]> attn_output_7 = transpose(perm = attn_output_7_perm_0, x = attn_output_5)[name = string("transpose_31")];
143
  tensor<fp32, [1, 512, 768]> input_25 = reshape(shape = var_193, x = attn_output_7)[name = string("input_25")];
144
+ tensor<fp32, [1, 512, 768]> input_27 = linear(bias = base_model_roberta_encoder_layer_1_attention_output_dense_bias, weight = base_model_roberta_encoder_layer_1_attention_output_dense_weight_quantized, x = input_25)[name = string("linear_9")];
145
  tensor<fp32, [1, 512, 768]> input_29 = add(x = input_27, y = hidden_states_7)[name = string("input_29")];
146
  tensor<int32, [1]> input_31_axes_0 = const()[name = string("input_31_axes_0"), val = tensor<int32, [1]>([-1])];
147
  tensor<fp32, [1, 512, 768]> input_31 = layer_norm(axes = input_31_axes_0, beta = base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight, x = input_29)[name = string("input_31")];
148
+ tensor<fp32, [1, 512, 3072]> input_33 = linear(bias = base_model_roberta_encoder_layer_1_intermediate_dense_bias, weight = base_model_roberta_encoder_layer_1_intermediate_dense_weight_quantized, x = input_31)[name = string("linear_10")];
149
  string input_35_mode_0 = const()[name = string("input_35_mode_0"), val = string("EXACT")];
150
  tensor<fp32, [1, 512, 3072]> input_35 = gelu(mode = input_35_mode_0, x = input_33)[name = string("input_35")];
151
+ tensor<fp32, [1, 512, 768]> input_37 = linear(bias = base_model_roberta_encoder_layer_1_output_dense_bias, weight = base_model_roberta_encoder_layer_1_output_dense_weight_quantized, x = input_35)[name = string("linear_11")];
152
  tensor<fp32, [1, 512, 768]> input_39 = add(x = input_37, y = input_31)[name = string("input_39")];
153
  tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
154
  tensor<fp32, [1, 512, 768]> hidden_states_13 = layer_norm(axes = hidden_states_13_axes_0, beta = base_model_roberta_encoder_layer_1_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_1_output_LayerNorm_weight, x = input_39)[name = string("hidden_states_13")];
155
+ tensor<fp32, [1, 512, 768]> x_25 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_query_bias, weight = base_model_roberta_encoder_layer_2_attention_self_query_weight_quantized, x = hidden_states_13)[name = string("linear_12")];
156
  tensor<int32, [4]> var_237 = const()[name = string("op_237"), val = tensor<int32, [4]>([1, 512, 12, 64])];
157
  tensor<fp32, [1, 512, 12, 64]> x_27 = reshape(shape = var_237, x = x_25)[name = string("x_27")];
158
+ tensor<fp32, [1, 512, 768]> x_29 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_key_bias, weight = base_model_roberta_encoder_layer_2_attention_self_key_weight_quantized, x = hidden_states_13)[name = string("linear_13")];
159
  tensor<int32, [4]> var_246 = const()[name = string("op_246"), val = tensor<int32, [4]>([1, 512, 12, 64])];
160
  tensor<fp32, [1, 512, 12, 64]> x_31 = reshape(shape = var_246, x = x_29)[name = string("x_31")];
161
+ tensor<fp32, [1, 512, 768]> x_33 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_value_bias, weight = base_model_roberta_encoder_layer_2_attention_self_value_weight_quantized, x = hidden_states_13)[name = string("linear_14")];
162
  tensor<int32, [4]> var_255 = const()[name = string("op_255"), val = tensor<int32, [4]>([1, 512, 12, 64])];
163
  tensor<fp32, [1, 512, 12, 64]> x = reshape(shape = var_255, x = x_33)[name = string("x")];
164
  tensor<int32, [4]> transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
 
172
  tensor<int32, [3]> var_261 = const()[name = string("op_261"), val = tensor<int32, [3]>([1, 512, 768])];
173
  tensor<fp32, [1, 512, 12, 64]> attn_output = transpose(perm = attn_output_perm_0, x = attn_output_9)[name = string("transpose_27")];
174
  tensor<fp32, [1, 512, 768]> input_41 = reshape(shape = var_261, x = attn_output)[name = string("input_41")];
175
+ tensor<fp32, [1, 512, 768]> input_43 = linear(bias = base_model_roberta_encoder_layer_2_attention_output_dense_bias, weight = base_model_roberta_encoder_layer_2_attention_output_dense_weight_quantized, x = input_41)[name = string("linear_15")];
176
  tensor<fp32, [1, 512, 768]> input_45 = add(x = input_43, y = hidden_states_13)[name = string("input_45")];
177
  tensor<int32, [1]> input_47_axes_0 = const()[name = string("input_47_axes_0"), val = tensor<int32, [1]>([-1])];
178
  tensor<fp32, [1, 512, 768]> input_47 = layer_norm(axes = input_47_axes_0, beta = base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight, x = input_45)[name = string("input_47")];
179
+ tensor<fp32, [1, 512, 3072]> input_49 = linear(bias = base_model_roberta_encoder_layer_2_intermediate_dense_bias, weight = base_model_roberta_encoder_layer_2_intermediate_dense_weight_quantized, x = input_47)[name = string("linear_16")];
180
  string input_51_mode_0 = const()[name = string("input_51_mode_0"), val = string("EXACT")];
181
  tensor<fp32, [1, 512, 3072]> input_51 = gelu(mode = input_51_mode_0, x = input_49)[name = string("input_51")];
182
+ tensor<fp32, [1, 512, 768]> input_53 = linear(bias = base_model_roberta_encoder_layer_2_output_dense_bias, weight = base_model_roberta_encoder_layer_2_output_dense_weight_quantized, x = input_51)[name = string("linear_17")];
183
  tensor<fp32, [1, 512, 768]> input_55 = add(x = input_53, y = input_47)[name = string("input_55")];
184
  tensor<int32, [1]> input_57_axes_0 = const()[name = string("input_57_axes_0"), val = tensor<int32, [1]>([-1])];
185
  tensor<fp32, [1, 512, 768]> input_57 = layer_norm(axes = input_57_axes_0, beta = base_model_roberta_encoder_layer_2_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_2_output_LayerNorm_weight, x = input_55)[name = string("input_57")];
SaT.mlmodelc/weights/weight.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7690c08745205045ab57378cffcf8933e8b10f7471517f6ab7d43ae21ab9ea5a
3
- size 856225216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08b5d724996325ea649fad4b140360e29a08a1eb6ecdb4329455a55bd6973c59
3
+ size 240860864