Sachin Desai commited on
Commit ·
87f6e26
1
Parent(s): c4a5e6d
quantized model
Browse files- SaT.mlmodelc/analytics/coremldata.bin +1 -1
- SaT.mlmodelc/coremldata.bin +1 -1
- SaT.mlmodelc/metadata.json +5 -4
- SaT.mlmodelc/model.mil +78 -78
- SaT.mlmodelc/weights/weight.bin +2 -2
SaT.mlmodelc/analytics/coremldata.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 241
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f64f977399bfea6387639b0aa47c328e1ccb5c72ec192fafebe6a6a037482aa1
|
| 3 |
size 241
|
SaT.mlmodelc/coremldata.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 347
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f7420b0c2ff587bdc15eb8b95e6adbc1d3ef598a76984e5cf6635be9344da29
|
| 3 |
size 347
|
SaT.mlmodelc/metadata.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
[
|
| 2 |
{
|
| 3 |
"metadataOutputVersion" : "3.0",
|
| 4 |
-
"storagePrecision" : "Float32",
|
| 5 |
"outputSchema" : [
|
| 6 |
{
|
| 7 |
"hasShapeFlexibility" : "0",
|
|
@@ -29,9 +29,10 @@
|
|
| 29 |
"Ios16.cumsum" : 1,
|
| 30 |
"Ios18.add" : 9,
|
| 31 |
"Ios18.layerNorm" : 7,
|
| 32 |
-
"Ios18.transpose" : 12,
|
| 33 |
"Ios18.cast" : 2,
|
|
|
|
| 34 |
"Ios18.reshape" : 12,
|
|
|
|
| 35 |
"Ios18.mul" : 2
|
| 36 |
},
|
| 37 |
"computePrecision" : "Mixed (Float32, Int32)",
|
|
@@ -51,9 +52,9 @@
|
|
| 51 |
"name" : "MLModelType_mlProgram"
|
| 52 |
},
|
| 53 |
"userDefinedMetadata" : {
|
| 54 |
-
"com.github.apple.coremltools.version" : "8.3.0",
|
| 55 |
"com.github.apple.coremltools.source_dialect" : "TorchScript",
|
| 56 |
-
"com.github.apple.coremltools.source" : "torch==2.9.0"
|
|
|
|
| 57 |
},
|
| 58 |
"inputSchema" : [
|
| 59 |
{
|
|
|
|
| 1 |
[
|
| 2 |
{
|
| 3 |
"metadataOutputVersion" : "3.0",
|
| 4 |
+
"storagePrecision" : "Mixed (Float32, Int8)",
|
| 5 |
"outputSchema" : [
|
| 6 |
{
|
| 7 |
"hasShapeFlexibility" : "0",
|
|
|
|
| 29 |
"Ios16.cumsum" : 1,
|
| 30 |
"Ios18.add" : 9,
|
| 31 |
"Ios18.layerNorm" : 7,
|
|
|
|
| 32 |
"Ios18.cast" : 2,
|
| 33 |
+
"Ios18.transpose" : 12,
|
| 34 |
"Ios18.reshape" : 12,
|
| 35 |
+
"Ios18.constexprBlockwiseShiftScale" : 21,
|
| 36 |
"Ios18.mul" : 2
|
| 37 |
},
|
| 38 |
"computePrecision" : "Mixed (Float32, Int32)",
|
|
|
|
| 52 |
"name" : "MLModelType_mlProgram"
|
| 53 |
},
|
| 54 |
"userDefinedMetadata" : {
|
|
|
|
| 55 |
"com.github.apple.coremltools.source_dialect" : "TorchScript",
|
| 56 |
+
"com.github.apple.coremltools.source" : "torch==2.9.0",
|
| 57 |
+
"com.github.apple.coremltools.version" : "8.3.0"
|
| 58 |
},
|
| 59 |
"inputSchema" : [
|
| 60 |
{
|
SaT.mlmodelc/model.mil
CHANGED
|
@@ -1,61 +1,61 @@
|
|
| 1 |
program(1.3)
|
| 2 |
-
[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}
|
| 3 |
{
|
| 4 |
func main<ios18>(tensor<int32, [1, 512]> attention_mask, tensor<int32, [1, 512]> input_ids) {
|
| 5 |
-
tensor<fp32, [250002, 768]>
|
| 6 |
-
tensor<fp32, [514, 768]>
|
| 7 |
-
tensor<fp32, [768]> base_model_roberta_embeddings_LayerNorm_bias = const()[name = string("base_model_roberta_embeddings_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 8 |
-
tensor<fp32, [768]> base_model_roberta_embeddings_LayerNorm_weight = const()[name = string("base_model_roberta_embeddings_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 9 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 10 |
-
tensor<fp32, [768, 768]>
|
| 11 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 12 |
-
tensor<fp32, [768, 768]>
|
| 13 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 14 |
-
tensor<fp32, [768, 768]>
|
| 15 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 16 |
-
tensor<fp32, [768, 768]>
|
| 17 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 18 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 19 |
-
tensor<fp32, [3072]> base_model_roberta_encoder_layer_0_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 20 |
-
tensor<fp32, [3072, 768]>
|
| 21 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 22 |
-
tensor<fp32, [768, 3072]>
|
| 23 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_0_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 24 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_0_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 25 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 26 |
-
tensor<fp32, [768, 768]>
|
| 27 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 28 |
-
tensor<fp32, [768, 768]>
|
| 29 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 30 |
-
tensor<fp32, [768, 768]>
|
| 31 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 32 |
-
tensor<fp32, [768, 768]>
|
| 33 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 34 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 35 |
-
tensor<fp32, [3072]> base_model_roberta_encoder_layer_1_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 36 |
-
tensor<fp32, [3072, 768]>
|
| 37 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 38 |
-
tensor<fp32, [768, 3072]>
|
| 39 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_1_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 40 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_1_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 41 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 42 |
-
tensor<fp32, [768, 768]>
|
| 43 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 44 |
-
tensor<fp32, [768, 768]>
|
| 45 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 46 |
-
tensor<fp32, [768, 768]>
|
| 47 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 48 |
-
tensor<fp32, [768, 768]>
|
| 49 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 50 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 51 |
-
tensor<fp32, [3072]> base_model_roberta_encoder_layer_2_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 52 |
-
tensor<fp32, [3072, 768]>
|
| 53 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 54 |
-
tensor<fp32, [768, 3072]>
|
| 55 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_2_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 56 |
-
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_2_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 57 |
tensor<fp32, [1]> base_model_classifier_bias = const()[name = string("base_model_classifier_bias"), val = tensor<fp32, [1]>([0x1.679ac8p-12])];
|
| 58 |
-
tensor<fp32, [1, 768]> base_model_classifier_weight = const()[name = string("base_model_classifier_weight"), val = tensor<fp32, [1, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(
|
| 59 |
int32 var_15 = const()[name = string("op_15"), val = int32(1)];
|
| 60 |
fp32 var_22 = const()[name = string("op_22"), val = fp32(0x1p+0)];
|
| 61 |
fp32 var_25 = const()[name = string("op_25"), val = fp32(0x1.4f8b58p-17)];
|
|
@@ -64,7 +64,7 @@ program(1.3)
|
|
| 64 |
tensor<int32, [1]> var_47_axes_0 = const()[name = string("op_47_axes_0"), val = tensor<int32, [1]>([2])];
|
| 65 |
tensor<int32, [1, 1, 1, 512]> var_47 = expand_dims(axes = var_47_axes_0, x = var_46)[name = string("op_47")];
|
| 66 |
string cast_2_dtype_0 = const()[name = string("cast_2_dtype_0"), val = string("fp32")];
|
| 67 |
-
tensor<fp32, [1, 1, 1, 512]> cast_2 = cast(dtype = cast_2_dtype_0, x = var_47)[name = string("
|
| 68 |
tensor<fp32, [1, 1, 1, 512]> var_50 = sub(x = var_22, y = cast_2)[name = string("op_50")];
|
| 69 |
fp32 var_51 = const()[name = string("op_51"), val = fp32(-0x1.fffffep+127)];
|
| 70 |
tensor<fp32, [1, 1, 1, 512]> attention_mask_1 = mul(x = var_50, y = var_51)[name = string("attention_mask")];
|
|
@@ -72,7 +72,7 @@ program(1.3)
|
|
| 72 |
string cast_3_dtype_0 = const()[name = string("cast_3_dtype_0"), val = string("int32")];
|
| 73 |
bool var_59_exclusive_0 = const()[name = string("op_59_exclusive_0"), val = bool(false)];
|
| 74 |
bool var_59_reverse_0 = const()[name = string("op_59_reverse_0"), val = bool(false)];
|
| 75 |
-
tensor<int32, [1, 512]> cast_3 = cast(dtype = cast_3_dtype_0, x = var_57)[name = string("
|
| 76 |
tensor<int32, [1, 512]> var_59 = cumsum(axis = var_15, exclusive = var_59_exclusive_0, reverse = var_59_reverse_0, x = cast_3)[name = string("op_59")];
|
| 77 |
tensor<int32, [1, 512]> incremental_indices = mul(x = var_59, y = cast_3)[name = string("incremental_indices")];
|
| 78 |
int32 var_65 = const()[name = string("op_65"), val = int32(1)];
|
|
@@ -80,23 +80,23 @@ program(1.3)
|
|
| 80 |
int32 inputs_embeds_axis_0 = const()[name = string("inputs_embeds_axis_0"), val = int32(0)];
|
| 81 |
int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)];
|
| 82 |
bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)];
|
| 83 |
-
tensor<fp32, [1, 512, 768]> inputs_embeds = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = input_ids, validate_indices = inputs_embeds_validate_indices_0, x =
|
| 84 |
-
tensor<fp32, [1, 512, 768]>
|
| 85 |
-
tensor<fp32, [1, 512, 768]> embeddings_1 = add(x = inputs_embeds, y =
|
| 86 |
int32 position_embeddings_1_axis_0 = const()[name = string("position_embeddings_1_axis_0"), val = int32(0)];
|
| 87 |
int32 position_embeddings_1_batch_dims_0 = const()[name = string("position_embeddings_1_batch_dims_0"), val = int32(0)];
|
| 88 |
bool position_embeddings_1_validate_indices_0 = const()[name = string("position_embeddings_1_validate_indices_0"), val = bool(false)];
|
| 89 |
-
tensor<fp32, [1, 512, 768]> position_embeddings_1 = gather(axis = position_embeddings_1_axis_0, batch_dims = position_embeddings_1_batch_dims_0, indices = input_3, validate_indices = position_embeddings_1_validate_indices_0, x =
|
| 90 |
tensor<fp32, [1, 512, 768]> input_5 = add(x = embeddings_1, y = position_embeddings_1)[name = string("input_5")];
|
| 91 |
tensor<int32, [1]> input_7_axes_0 = const()[name = string("input_7_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 92 |
tensor<fp32, [1, 512, 768]> input_7 = layer_norm(axes = input_7_axes_0, beta = base_model_roberta_embeddings_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_embeddings_LayerNorm_weight, x = input_5)[name = string("input_7")];
|
| 93 |
-
tensor<fp32, [1, 512, 768]> x_1 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_query_bias, weight =
|
| 94 |
tensor<int32, [4]> var_101 = const()[name = string("op_101"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 95 |
tensor<fp32, [1, 512, 12, 64]> x_3 = reshape(shape = var_101, x = x_1)[name = string("x_3")];
|
| 96 |
-
tensor<fp32, [1, 512, 768]> x_5 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_key_bias, weight =
|
| 97 |
tensor<int32, [4]> var_110 = const()[name = string("op_110"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 98 |
tensor<fp32, [1, 512, 12, 64]> x_7 = reshape(shape = var_110, x = x_5)[name = string("x_7")];
|
| 99 |
-
tensor<fp32, [1, 512, 768]> x_9 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_value_bias, weight =
|
| 100 |
tensor<int32, [4]> var_119 = const()[name = string("op_119"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 101 |
tensor<fp32, [1, 512, 12, 64]> x_11 = reshape(shape = var_119, x = x_9)[name = string("x_11")];
|
| 102 |
tensor<int32, [4]> transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
|
@@ -110,24 +110,24 @@ program(1.3)
|
|
| 110 |
tensor<int32, [3]> var_125 = const()[name = string("op_125"), val = tensor<int32, [3]>([1, 512, 768])];
|
| 111 |
tensor<fp32, [1, 512, 12, 64]> attn_output_3 = transpose(perm = attn_output_3_perm_0, x = attn_output_1)[name = string("transpose_35")];
|
| 112 |
tensor<fp32, [1, 512, 768]> input_9 = reshape(shape = var_125, x = attn_output_3)[name = string("input_9")];
|
| 113 |
-
tensor<fp32, [1, 512, 768]> input_11 = linear(bias = base_model_roberta_encoder_layer_0_attention_output_dense_bias, weight =
|
| 114 |
tensor<fp32, [1, 512, 768]> input_13 = add(x = input_11, y = input_7)[name = string("input_13")];
|
| 115 |
tensor<int32, [1]> input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 116 |
tensor<fp32, [1, 512, 768]> input_15 = layer_norm(axes = input_15_axes_0, beta = base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight, x = input_13)[name = string("input_15")];
|
| 117 |
-
tensor<fp32, [1, 512, 3072]> input_17 = linear(bias = base_model_roberta_encoder_layer_0_intermediate_dense_bias, weight =
|
| 118 |
string input_19_mode_0 = const()[name = string("input_19_mode_0"), val = string("EXACT")];
|
| 119 |
tensor<fp32, [1, 512, 3072]> input_19 = gelu(mode = input_19_mode_0, x = input_17)[name = string("input_19")];
|
| 120 |
-
tensor<fp32, [1, 512, 768]> input_21 = linear(bias = base_model_roberta_encoder_layer_0_output_dense_bias, weight =
|
| 121 |
tensor<fp32, [1, 512, 768]> input_23 = add(x = input_21, y = input_15)[name = string("input_23")];
|
| 122 |
tensor<int32, [1]> hidden_states_7_axes_0 = const()[name = string("hidden_states_7_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 123 |
tensor<fp32, [1, 512, 768]> hidden_states_7 = layer_norm(axes = hidden_states_7_axes_0, beta = base_model_roberta_encoder_layer_0_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_0_output_LayerNorm_weight, x = input_23)[name = string("hidden_states_7")];
|
| 124 |
-
tensor<fp32, [1, 512, 768]> x_13 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_query_bias, weight =
|
| 125 |
tensor<int32, [4]> var_169 = const()[name = string("op_169"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 126 |
tensor<fp32, [1, 512, 12, 64]> x_15 = reshape(shape = var_169, x = x_13)[name = string("x_15")];
|
| 127 |
-
tensor<fp32, [1, 512, 768]> x_17 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_key_bias, weight =
|
| 128 |
tensor<int32, [4]> var_178 = const()[name = string("op_178"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 129 |
tensor<fp32, [1, 512, 12, 64]> x_19 = reshape(shape = var_178, x = x_17)[name = string("x_19")];
|
| 130 |
-
tensor<fp32, [1, 512, 768]> x_21 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_value_bias, weight =
|
| 131 |
tensor<int32, [4]> var_187 = const()[name = string("op_187"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 132 |
tensor<fp32, [1, 512, 12, 64]> x_23 = reshape(shape = var_187, x = x_21)[name = string("x_23")];
|
| 133 |
tensor<int32, [4]> transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
|
@@ -141,24 +141,24 @@ program(1.3)
|
|
| 141 |
tensor<int32, [3]> var_193 = const()[name = string("op_193"), val = tensor<int32, [3]>([1, 512, 768])];
|
| 142 |
tensor<fp32, [1, 512, 12, 64]> attn_output_7 = transpose(perm = attn_output_7_perm_0, x = attn_output_5)[name = string("transpose_31")];
|
| 143 |
tensor<fp32, [1, 512, 768]> input_25 = reshape(shape = var_193, x = attn_output_7)[name = string("input_25")];
|
| 144 |
-
tensor<fp32, [1, 512, 768]> input_27 = linear(bias = base_model_roberta_encoder_layer_1_attention_output_dense_bias, weight =
|
| 145 |
tensor<fp32, [1, 512, 768]> input_29 = add(x = input_27, y = hidden_states_7)[name = string("input_29")];
|
| 146 |
tensor<int32, [1]> input_31_axes_0 = const()[name = string("input_31_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 147 |
tensor<fp32, [1, 512, 768]> input_31 = layer_norm(axes = input_31_axes_0, beta = base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight, x = input_29)[name = string("input_31")];
|
| 148 |
-
tensor<fp32, [1, 512, 3072]> input_33 = linear(bias = base_model_roberta_encoder_layer_1_intermediate_dense_bias, weight =
|
| 149 |
string input_35_mode_0 = const()[name = string("input_35_mode_0"), val = string("EXACT")];
|
| 150 |
tensor<fp32, [1, 512, 3072]> input_35 = gelu(mode = input_35_mode_0, x = input_33)[name = string("input_35")];
|
| 151 |
-
tensor<fp32, [1, 512, 768]> input_37 = linear(bias = base_model_roberta_encoder_layer_1_output_dense_bias, weight =
|
| 152 |
tensor<fp32, [1, 512, 768]> input_39 = add(x = input_37, y = input_31)[name = string("input_39")];
|
| 153 |
tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 154 |
tensor<fp32, [1, 512, 768]> hidden_states_13 = layer_norm(axes = hidden_states_13_axes_0, beta = base_model_roberta_encoder_layer_1_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_1_output_LayerNorm_weight, x = input_39)[name = string("hidden_states_13")];
|
| 155 |
-
tensor<fp32, [1, 512, 768]> x_25 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_query_bias, weight =
|
| 156 |
tensor<int32, [4]> var_237 = const()[name = string("op_237"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 157 |
tensor<fp32, [1, 512, 12, 64]> x_27 = reshape(shape = var_237, x = x_25)[name = string("x_27")];
|
| 158 |
-
tensor<fp32, [1, 512, 768]> x_29 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_key_bias, weight =
|
| 159 |
tensor<int32, [4]> var_246 = const()[name = string("op_246"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 160 |
tensor<fp32, [1, 512, 12, 64]> x_31 = reshape(shape = var_246, x = x_29)[name = string("x_31")];
|
| 161 |
-
tensor<fp32, [1, 512, 768]> x_33 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_value_bias, weight =
|
| 162 |
tensor<int32, [4]> var_255 = const()[name = string("op_255"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 163 |
tensor<fp32, [1, 512, 12, 64]> x = reshape(shape = var_255, x = x_33)[name = string("x")];
|
| 164 |
tensor<int32, [4]> transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
|
@@ -172,14 +172,14 @@ program(1.3)
|
|
| 172 |
tensor<int32, [3]> var_261 = const()[name = string("op_261"), val = tensor<int32, [3]>([1, 512, 768])];
|
| 173 |
tensor<fp32, [1, 512, 12, 64]> attn_output = transpose(perm = attn_output_perm_0, x = attn_output_9)[name = string("transpose_27")];
|
| 174 |
tensor<fp32, [1, 512, 768]> input_41 = reshape(shape = var_261, x = attn_output)[name = string("input_41")];
|
| 175 |
-
tensor<fp32, [1, 512, 768]> input_43 = linear(bias = base_model_roberta_encoder_layer_2_attention_output_dense_bias, weight =
|
| 176 |
tensor<fp32, [1, 512, 768]> input_45 = add(x = input_43, y = hidden_states_13)[name = string("input_45")];
|
| 177 |
tensor<int32, [1]> input_47_axes_0 = const()[name = string("input_47_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 178 |
tensor<fp32, [1, 512, 768]> input_47 = layer_norm(axes = input_47_axes_0, beta = base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight, x = input_45)[name = string("input_47")];
|
| 179 |
-
tensor<fp32, [1, 512, 3072]> input_49 = linear(bias = base_model_roberta_encoder_layer_2_intermediate_dense_bias, weight =
|
| 180 |
string input_51_mode_0 = const()[name = string("input_51_mode_0"), val = string("EXACT")];
|
| 181 |
tensor<fp32, [1, 512, 3072]> input_51 = gelu(mode = input_51_mode_0, x = input_49)[name = string("input_51")];
|
| 182 |
-
tensor<fp32, [1, 512, 768]> input_53 = linear(bias = base_model_roberta_encoder_layer_2_output_dense_bias, weight =
|
| 183 |
tensor<fp32, [1, 512, 768]> input_55 = add(x = input_53, y = input_47)[name = string("input_55")];
|
| 184 |
tensor<int32, [1]> input_57_axes_0 = const()[name = string("input_57_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 185 |
tensor<fp32, [1, 512, 768]> input_57 = layer_norm(axes = input_57_axes_0, beta = base_model_roberta_encoder_layer_2_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_2_output_LayerNorm_weight, x = input_55)[name = string("input_57")];
|
|
|
|
| 1 |
program(1.3)
|
| 2 |
+
[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})]
|
| 3 |
{
|
| 4 |
func main<ios18>(tensor<int32, [1, 512]> attention_mask, tensor<int32, [1, 512]> input_ids) {
|
| 5 |
+
tensor<fp32, [250002, 768]> base_model_roberta_embeddings_word_embeddings_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [250002, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor<fp32, [250002, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192001664))))[name = string("base_model_roberta_embeddings_word_embeddings_weight_quantized")];
|
| 6 |
+
tensor<fp32, [514, 768]> base_model_roberta_embeddings_position_embeddings_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [514, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216001920))), scale = tensor<fp32, [514, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216396736))))[name = string("base_model_roberta_embeddings_position_embeddings_weight_quantized")];
|
| 7 |
+
tensor<fp32, [768]> base_model_roberta_embeddings_LayerNorm_bias = const()[name = string("base_model_roberta_embeddings_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216446144)))];
|
| 8 |
+
tensor<fp32, [768]> base_model_roberta_embeddings_LayerNorm_weight = const()[name = string("base_model_roberta_embeddings_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216449280)))];
|
| 9 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216452416)))];
|
| 10 |
+
tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_self_query_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216455552))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217045440))))[name = string("base_model_roberta_encoder_layer_0_attention_self_query_weight_quantized")];
|
| 11 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217119232)))];
|
| 12 |
+
tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_self_key_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217122368))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217712256))))[name = string("base_model_roberta_encoder_layer_0_attention_self_key_weight_quantized")];
|
| 13 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217786048)))];
|
| 14 |
+
tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_self_value_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217789184))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218379072))))[name = string("base_model_roberta_encoder_layer_0_attention_self_value_weight_quantized")];
|
| 15 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218452864)))];
|
| 16 |
+
tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_0_attention_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218456000))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219045888))))[name = string("base_model_roberta_encoder_layer_0_attention_output_dense_weight_quantized")];
|
| 17 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219119680)))];
|
| 18 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219122816)))];
|
| 19 |
+
tensor<fp32, [3072]> base_model_roberta_encoder_layer_0_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219125952)))];
|
| 20 |
+
tensor<fp32, [3072, 768]> base_model_roberta_encoder_layer_0_intermediate_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219138304))), scale = tensor<fp32, [3072, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221497664))))[name = string("base_model_roberta_encoder_layer_0_intermediate_dense_weight_quantized")];
|
| 21 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_0_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221792640)))];
|
| 22 |
+
tensor<fp32, [768, 3072]> base_model_roberta_encoder_layer_0_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221795776))), scale = tensor<fp32, [768, 96]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224155136))))[name = string("base_model_roberta_encoder_layer_0_output_dense_weight_quantized")];
|
| 23 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_0_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224450112)))];
|
| 24 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_0_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_0_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224453248)))];
|
| 25 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224456384)))];
|
| 26 |
+
tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_self_query_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224459520))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225049408))))[name = string("base_model_roberta_encoder_layer_1_attention_self_query_weight_quantized")];
|
| 27 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225123200)))];
|
| 28 |
+
tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_self_key_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225126336))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225716224))))[name = string("base_model_roberta_encoder_layer_1_attention_self_key_weight_quantized")];
|
| 29 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225790016)))];
|
| 30 |
+
tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_self_value_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225793152))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226383040))))[name = string("base_model_roberta_encoder_layer_1_attention_self_value_weight_quantized")];
|
| 31 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226456832)))];
|
| 32 |
+
tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_1_attention_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226459968))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227049856))))[name = string("base_model_roberta_encoder_layer_1_attention_output_dense_weight_quantized")];
|
| 33 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227123648)))];
|
| 34 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227126784)))];
|
| 35 |
+
tensor<fp32, [3072]> base_model_roberta_encoder_layer_1_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227129920)))];
|
| 36 |
+
tensor<fp32, [3072, 768]> base_model_roberta_encoder_layer_1_intermediate_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227142272))), scale = tensor<fp32, [3072, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229501632))))[name = string("base_model_roberta_encoder_layer_1_intermediate_dense_weight_quantized")];
|
| 37 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_1_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229796608)))];
|
| 38 |
+
tensor<fp32, [768, 3072]> base_model_roberta_encoder_layer_1_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229799744))), scale = tensor<fp32, [768, 96]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232159104))))[name = string("base_model_roberta_encoder_layer_1_output_dense_weight_quantized")];
|
| 39 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_1_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232454080)))];
|
| 40 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_1_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_1_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232457216)))];
|
| 41 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_query_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_query_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232460352)))];
|
| 42 |
+
tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_self_query_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232463488))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233053376))))[name = string("base_model_roberta_encoder_layer_2_attention_self_query_weight_quantized")];
|
| 43 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_key_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_key_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233127168)))];
|
| 44 |
+
tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_self_key_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233130304))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233720192))))[name = string("base_model_roberta_encoder_layer_2_attention_self_key_weight_quantized")];
|
| 45 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_self_value_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_self_value_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233793984)))];
|
| 46 |
+
tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_self_value_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233797120))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234387008))))[name = string("base_model_roberta_encoder_layer_2_attention_self_value_weight_quantized")];
|
| 47 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234460800)))];
|
| 48 |
+
tensor<fp32, [768, 768]> base_model_roberta_encoder_layer_2_attention_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234463936))), scale = tensor<fp32, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235053824))))[name = string("base_model_roberta_encoder_layer_2_attention_output_dense_weight_quantized")];
|
| 49 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235127616)))];
|
| 50 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235130752)))];
|
| 51 |
+
tensor<fp32, [3072]> base_model_roberta_encoder_layer_2_intermediate_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_intermediate_dense_bias"), val = tensor<fp32, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235133888)))];
|
| 52 |
+
tensor<fp32, [3072, 768]> base_model_roberta_encoder_layer_2_intermediate_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235146240))), scale = tensor<fp32, [3072, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237505600))))[name = string("base_model_roberta_encoder_layer_2_intermediate_dense_weight_quantized")];
|
| 53 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_dense_bias = const()[name = string("base_model_roberta_encoder_layer_2_output_dense_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237800576)))];
|
| 54 |
+
tensor<fp32, [768, 3072]> base_model_roberta_encoder_layer_2_output_dense_weight_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237803712))), scale = tensor<fp32, [768, 96]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240163072))))[name = string("base_model_roberta_encoder_layer_2_output_dense_weight_quantized")];
|
| 55 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_LayerNorm_bias = const()[name = string("base_model_roberta_encoder_layer_2_output_LayerNorm_bias"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240458048)))];
|
| 56 |
+
tensor<fp32, [768]> base_model_roberta_encoder_layer_2_output_LayerNorm_weight = const()[name = string("base_model_roberta_encoder_layer_2_output_LayerNorm_weight"), val = tensor<fp32, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240461184)))];
|
| 57 |
tensor<fp32, [1]> base_model_classifier_bias = const()[name = string("base_model_classifier_bias"), val = tensor<fp32, [1]>([0x1.679ac8p-12])];
|
| 58 |
+
tensor<fp32, [1, 768]> base_model_classifier_weight = const()[name = string("base_model_classifier_weight"), val = tensor<fp32, [1, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240464320)))];
|
| 59 |
int32 var_15 = const()[name = string("op_15"), val = int32(1)];
|
| 60 |
fp32 var_22 = const()[name = string("op_22"), val = fp32(0x1p+0)];
|
| 61 |
fp32 var_25 = const()[name = string("op_25"), val = fp32(0x1.4f8b58p-17)];
|
|
|
|
| 64 |
tensor<int32, [1]> var_47_axes_0 = const()[name = string("op_47_axes_0"), val = tensor<int32, [1]>([2])];
|
| 65 |
tensor<int32, [1, 1, 1, 512]> var_47 = expand_dims(axes = var_47_axes_0, x = var_46)[name = string("op_47")];
|
| 66 |
string cast_2_dtype_0 = const()[name = string("cast_2_dtype_0"), val = string("fp32")];
|
| 67 |
+
tensor<fp32, [1, 1, 1, 512]> cast_2 = cast(dtype = cast_2_dtype_0, x = var_47)[name = string("cast_1")];
|
| 68 |
tensor<fp32, [1, 1, 1, 512]> var_50 = sub(x = var_22, y = cast_2)[name = string("op_50")];
|
| 69 |
fp32 var_51 = const()[name = string("op_51"), val = fp32(-0x1.fffffep+127)];
|
| 70 |
tensor<fp32, [1, 1, 1, 512]> attention_mask_1 = mul(x = var_50, y = var_51)[name = string("attention_mask")];
|
|
|
|
| 72 |
string cast_3_dtype_0 = const()[name = string("cast_3_dtype_0"), val = string("int32")];
|
| 73 |
bool var_59_exclusive_0 = const()[name = string("op_59_exclusive_0"), val = bool(false)];
|
| 74 |
bool var_59_reverse_0 = const()[name = string("op_59_reverse_0"), val = bool(false)];
|
| 75 |
+
tensor<int32, [1, 512]> cast_3 = cast(dtype = cast_3_dtype_0, x = var_57)[name = string("cast_0")];
|
| 76 |
tensor<int32, [1, 512]> var_59 = cumsum(axis = var_15, exclusive = var_59_exclusive_0, reverse = var_59_reverse_0, x = cast_3)[name = string("op_59")];
|
| 77 |
tensor<int32, [1, 512]> incremental_indices = mul(x = var_59, y = cast_3)[name = string("incremental_indices")];
|
| 78 |
int32 var_65 = const()[name = string("op_65"), val = int32(1)];
|
|
|
|
| 80 |
int32 inputs_embeds_axis_0 = const()[name = string("inputs_embeds_axis_0"), val = int32(0)];
|
| 81 |
int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)];
|
| 82 |
bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)];
|
| 83 |
+
tensor<fp32, [1, 512, 768]> inputs_embeds = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = input_ids, validate_indices = inputs_embeds_validate_indices_0, x = base_model_roberta_embeddings_word_embeddings_weight_quantized)[name = string("inputs_embeds")];
|
| 84 |
+
tensor<fp32, [1, 512, 768]> token_type_embeddings_1_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [1, 512, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240467456))), scale = tensor<fp32, [1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240860736))))[name = string("token_type_embeddings_1_quantized")];
|
| 85 |
+
tensor<fp32, [1, 512, 768]> embeddings_1 = add(x = inputs_embeds, y = token_type_embeddings_1_quantized)[name = string("embeddings_1")];
|
| 86 |
int32 position_embeddings_1_axis_0 = const()[name = string("position_embeddings_1_axis_0"), val = int32(0)];
|
| 87 |
int32 position_embeddings_1_batch_dims_0 = const()[name = string("position_embeddings_1_batch_dims_0"), val = int32(0)];
|
| 88 |
bool position_embeddings_1_validate_indices_0 = const()[name = string("position_embeddings_1_validate_indices_0"), val = bool(false)];
|
| 89 |
+
tensor<fp32, [1, 512, 768]> position_embeddings_1 = gather(axis = position_embeddings_1_axis_0, batch_dims = position_embeddings_1_batch_dims_0, indices = input_3, validate_indices = position_embeddings_1_validate_indices_0, x = base_model_roberta_embeddings_position_embeddings_weight_quantized)[name = string("position_embeddings_1")];
|
| 90 |
tensor<fp32, [1, 512, 768]> input_5 = add(x = embeddings_1, y = position_embeddings_1)[name = string("input_5")];
|
| 91 |
tensor<int32, [1]> input_7_axes_0 = const()[name = string("input_7_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 92 |
tensor<fp32, [1, 512, 768]> input_7 = layer_norm(axes = input_7_axes_0, beta = base_model_roberta_embeddings_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_embeddings_LayerNorm_weight, x = input_5)[name = string("input_7")];
|
| 93 |
+
tensor<fp32, [1, 512, 768]> x_1 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_query_bias, weight = base_model_roberta_encoder_layer_0_attention_self_query_weight_quantized, x = input_7)[name = string("linear_0")];
|
| 94 |
tensor<int32, [4]> var_101 = const()[name = string("op_101"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 95 |
tensor<fp32, [1, 512, 12, 64]> x_3 = reshape(shape = var_101, x = x_1)[name = string("x_3")];
|
| 96 |
+
tensor<fp32, [1, 512, 768]> x_5 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_key_bias, weight = base_model_roberta_encoder_layer_0_attention_self_key_weight_quantized, x = input_7)[name = string("linear_1")];
|
| 97 |
tensor<int32, [4]> var_110 = const()[name = string("op_110"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 98 |
tensor<fp32, [1, 512, 12, 64]> x_7 = reshape(shape = var_110, x = x_5)[name = string("x_7")];
|
| 99 |
+
tensor<fp32, [1, 512, 768]> x_9 = linear(bias = base_model_roberta_encoder_layer_0_attention_self_value_bias, weight = base_model_roberta_encoder_layer_0_attention_self_value_weight_quantized, x = input_7)[name = string("linear_2")];
|
| 100 |
tensor<int32, [4]> var_119 = const()[name = string("op_119"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 101 |
tensor<fp32, [1, 512, 12, 64]> x_11 = reshape(shape = var_119, x = x_9)[name = string("x_11")];
|
| 102 |
tensor<int32, [4]> transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
|
|
|
| 110 |
tensor<int32, [3]> var_125 = const()[name = string("op_125"), val = tensor<int32, [3]>([1, 512, 768])];
|
| 111 |
tensor<fp32, [1, 512, 12, 64]> attn_output_3 = transpose(perm = attn_output_3_perm_0, x = attn_output_1)[name = string("transpose_35")];
|
| 112 |
tensor<fp32, [1, 512, 768]> input_9 = reshape(shape = var_125, x = attn_output_3)[name = string("input_9")];
|
| 113 |
+
tensor<fp32, [1, 512, 768]> input_11 = linear(bias = base_model_roberta_encoder_layer_0_attention_output_dense_bias, weight = base_model_roberta_encoder_layer_0_attention_output_dense_weight_quantized, x = input_9)[name = string("linear_3")];
|
| 114 |
tensor<fp32, [1, 512, 768]> input_13 = add(x = input_11, y = input_7)[name = string("input_13")];
|
| 115 |
tensor<int32, [1]> input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 116 |
tensor<fp32, [1, 512, 768]> input_15 = layer_norm(axes = input_15_axes_0, beta = base_model_roberta_encoder_layer_0_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_0_attention_output_LayerNorm_weight, x = input_13)[name = string("input_15")];
|
| 117 |
+
tensor<fp32, [1, 512, 3072]> input_17 = linear(bias = base_model_roberta_encoder_layer_0_intermediate_dense_bias, weight = base_model_roberta_encoder_layer_0_intermediate_dense_weight_quantized, x = input_15)[name = string("linear_4")];
|
| 118 |
string input_19_mode_0 = const()[name = string("input_19_mode_0"), val = string("EXACT")];
|
| 119 |
tensor<fp32, [1, 512, 3072]> input_19 = gelu(mode = input_19_mode_0, x = input_17)[name = string("input_19")];
|
| 120 |
+
tensor<fp32, [1, 512, 768]> input_21 = linear(bias = base_model_roberta_encoder_layer_0_output_dense_bias, weight = base_model_roberta_encoder_layer_0_output_dense_weight_quantized, x = input_19)[name = string("linear_5")];
|
| 121 |
tensor<fp32, [1, 512, 768]> input_23 = add(x = input_21, y = input_15)[name = string("input_23")];
|
| 122 |
tensor<int32, [1]> hidden_states_7_axes_0 = const()[name = string("hidden_states_7_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 123 |
tensor<fp32, [1, 512, 768]> hidden_states_7 = layer_norm(axes = hidden_states_7_axes_0, beta = base_model_roberta_encoder_layer_0_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_0_output_LayerNorm_weight, x = input_23)[name = string("hidden_states_7")];
|
| 124 |
+
tensor<fp32, [1, 512, 768]> x_13 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_query_bias, weight = base_model_roberta_encoder_layer_1_attention_self_query_weight_quantized, x = hidden_states_7)[name = string("linear_6")];
|
| 125 |
tensor<int32, [4]> var_169 = const()[name = string("op_169"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 126 |
tensor<fp32, [1, 512, 12, 64]> x_15 = reshape(shape = var_169, x = x_13)[name = string("x_15")];
|
| 127 |
+
tensor<fp32, [1, 512, 768]> x_17 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_key_bias, weight = base_model_roberta_encoder_layer_1_attention_self_key_weight_quantized, x = hidden_states_7)[name = string("linear_7")];
|
| 128 |
tensor<int32, [4]> var_178 = const()[name = string("op_178"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 129 |
tensor<fp32, [1, 512, 12, 64]> x_19 = reshape(shape = var_178, x = x_17)[name = string("x_19")];
|
| 130 |
+
tensor<fp32, [1, 512, 768]> x_21 = linear(bias = base_model_roberta_encoder_layer_1_attention_self_value_bias, weight = base_model_roberta_encoder_layer_1_attention_self_value_weight_quantized, x = hidden_states_7)[name = string("linear_8")];
|
| 131 |
tensor<int32, [4]> var_187 = const()[name = string("op_187"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 132 |
tensor<fp32, [1, 512, 12, 64]> x_23 = reshape(shape = var_187, x = x_21)[name = string("x_23")];
|
| 133 |
tensor<int32, [4]> transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
|
|
|
| 141 |
tensor<int32, [3]> var_193 = const()[name = string("op_193"), val = tensor<int32, [3]>([1, 512, 768])];
|
| 142 |
tensor<fp32, [1, 512, 12, 64]> attn_output_7 = transpose(perm = attn_output_7_perm_0, x = attn_output_5)[name = string("transpose_31")];
|
| 143 |
tensor<fp32, [1, 512, 768]> input_25 = reshape(shape = var_193, x = attn_output_7)[name = string("input_25")];
|
| 144 |
+
tensor<fp32, [1, 512, 768]> input_27 = linear(bias = base_model_roberta_encoder_layer_1_attention_output_dense_bias, weight = base_model_roberta_encoder_layer_1_attention_output_dense_weight_quantized, x = input_25)[name = string("linear_9")];
|
| 145 |
tensor<fp32, [1, 512, 768]> input_29 = add(x = input_27, y = hidden_states_7)[name = string("input_29")];
|
| 146 |
tensor<int32, [1]> input_31_axes_0 = const()[name = string("input_31_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 147 |
tensor<fp32, [1, 512, 768]> input_31 = layer_norm(axes = input_31_axes_0, beta = base_model_roberta_encoder_layer_1_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_1_attention_output_LayerNorm_weight, x = input_29)[name = string("input_31")];
|
| 148 |
+
tensor<fp32, [1, 512, 3072]> input_33 = linear(bias = base_model_roberta_encoder_layer_1_intermediate_dense_bias, weight = base_model_roberta_encoder_layer_1_intermediate_dense_weight_quantized, x = input_31)[name = string("linear_10")];
|
| 149 |
string input_35_mode_0 = const()[name = string("input_35_mode_0"), val = string("EXACT")];
|
| 150 |
tensor<fp32, [1, 512, 3072]> input_35 = gelu(mode = input_35_mode_0, x = input_33)[name = string("input_35")];
|
| 151 |
+
tensor<fp32, [1, 512, 768]> input_37 = linear(bias = base_model_roberta_encoder_layer_1_output_dense_bias, weight = base_model_roberta_encoder_layer_1_output_dense_weight_quantized, x = input_35)[name = string("linear_11")];
|
| 152 |
tensor<fp32, [1, 512, 768]> input_39 = add(x = input_37, y = input_31)[name = string("input_39")];
|
| 153 |
tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 154 |
tensor<fp32, [1, 512, 768]> hidden_states_13 = layer_norm(axes = hidden_states_13_axes_0, beta = base_model_roberta_encoder_layer_1_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_1_output_LayerNorm_weight, x = input_39)[name = string("hidden_states_13")];
|
| 155 |
+
tensor<fp32, [1, 512, 768]> x_25 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_query_bias, weight = base_model_roberta_encoder_layer_2_attention_self_query_weight_quantized, x = hidden_states_13)[name = string("linear_12")];
|
| 156 |
tensor<int32, [4]> var_237 = const()[name = string("op_237"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 157 |
tensor<fp32, [1, 512, 12, 64]> x_27 = reshape(shape = var_237, x = x_25)[name = string("x_27")];
|
| 158 |
+
tensor<fp32, [1, 512, 768]> x_29 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_key_bias, weight = base_model_roberta_encoder_layer_2_attention_self_key_weight_quantized, x = hidden_states_13)[name = string("linear_13")];
|
| 159 |
tensor<int32, [4]> var_246 = const()[name = string("op_246"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 160 |
tensor<fp32, [1, 512, 12, 64]> x_31 = reshape(shape = var_246, x = x_29)[name = string("x_31")];
|
| 161 |
+
tensor<fp32, [1, 512, 768]> x_33 = linear(bias = base_model_roberta_encoder_layer_2_attention_self_value_bias, weight = base_model_roberta_encoder_layer_2_attention_self_value_weight_quantized, x = hidden_states_13)[name = string("linear_14")];
|
| 162 |
tensor<int32, [4]> var_255 = const()[name = string("op_255"), val = tensor<int32, [4]>([1, 512, 12, 64])];
|
| 163 |
tensor<fp32, [1, 512, 12, 64]> x = reshape(shape = var_255, x = x_33)[name = string("x")];
|
| 164 |
tensor<int32, [4]> transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
|
|
|
| 172 |
tensor<int32, [3]> var_261 = const()[name = string("op_261"), val = tensor<int32, [3]>([1, 512, 768])];
|
| 173 |
tensor<fp32, [1, 512, 12, 64]> attn_output = transpose(perm = attn_output_perm_0, x = attn_output_9)[name = string("transpose_27")];
|
| 174 |
tensor<fp32, [1, 512, 768]> input_41 = reshape(shape = var_261, x = attn_output)[name = string("input_41")];
|
| 175 |
+
tensor<fp32, [1, 512, 768]> input_43 = linear(bias = base_model_roberta_encoder_layer_2_attention_output_dense_bias, weight = base_model_roberta_encoder_layer_2_attention_output_dense_weight_quantized, x = input_41)[name = string("linear_15")];
|
| 176 |
tensor<fp32, [1, 512, 768]> input_45 = add(x = input_43, y = hidden_states_13)[name = string("input_45")];
|
| 177 |
tensor<int32, [1]> input_47_axes_0 = const()[name = string("input_47_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 178 |
tensor<fp32, [1, 512, 768]> input_47 = layer_norm(axes = input_47_axes_0, beta = base_model_roberta_encoder_layer_2_attention_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_2_attention_output_LayerNorm_weight, x = input_45)[name = string("input_47")];
|
| 179 |
+
tensor<fp32, [1, 512, 3072]> input_49 = linear(bias = base_model_roberta_encoder_layer_2_intermediate_dense_bias, weight = base_model_roberta_encoder_layer_2_intermediate_dense_weight_quantized, x = input_47)[name = string("linear_16")];
|
| 180 |
string input_51_mode_0 = const()[name = string("input_51_mode_0"), val = string("EXACT")];
|
| 181 |
tensor<fp32, [1, 512, 3072]> input_51 = gelu(mode = input_51_mode_0, x = input_49)[name = string("input_51")];
|
| 182 |
+
tensor<fp32, [1, 512, 768]> input_53 = linear(bias = base_model_roberta_encoder_layer_2_output_dense_bias, weight = base_model_roberta_encoder_layer_2_output_dense_weight_quantized, x = input_51)[name = string("linear_17")];
|
| 183 |
tensor<fp32, [1, 512, 768]> input_55 = add(x = input_53, y = input_47)[name = string("input_55")];
|
| 184 |
tensor<int32, [1]> input_57_axes_0 = const()[name = string("input_57_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 185 |
tensor<fp32, [1, 512, 768]> input_57 = layer_norm(axes = input_57_axes_0, beta = base_model_roberta_encoder_layer_2_output_LayerNorm_bias, epsilon = var_25, gamma = base_model_roberta_encoder_layer_2_output_LayerNorm_weight, x = input_55)[name = string("input_57")];
|
SaT.mlmodelc/weights/weight.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08b5d724996325ea649fad4b140360e29a08a1eb6ecdb4329455a55bd6973c59
|
| 3 |
+
size 240860864
|