Sachin Desai commited on
Commit
92b2959
·
1 Parent(s): 8742cdc

remove individual files

Browse files
Files changed (5) hide show
  1. analytics/coremldata.bin +0 -3
  2. coremldata.bin +0 -3
  3. metadata.json +0 -91
  4. model.mil +0 -204
  5. weights/weight.bin +0 -3
analytics/coremldata.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3dfa7b616cecfd9db758ddd58cc951de5f291b7e85da39107f342a59515163f
3
- size 241
 
 
 
 
coremldata.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2155db28e8ab643d8179098242a8e33dc0634d3e005a6bd033ab81889472aa3
3
- size 459
 
 
 
 
metadata.json DELETED
@@ -1,91 +0,0 @@
1
- [
2
- {
3
- "shortDescription" : "A model that segments any text",
4
- "metadataOutputVersion" : "3.0",
5
- "outputSchema" : [
6
- {
7
- "hasShapeFlexibility" : "0",
8
- "isOptional" : "0",
9
- "dataType" : "Float16",
10
- "formattedType" : "MultiArray (Float16 1 × 512 × 1)",
11
- "shortDescription" : "",
12
- "shape" : "[1, 512, 1]",
13
- "name" : "output",
14
- "type" : "MultiArray"
15
- }
16
- ],
17
- "version" : "1",
18
- "modelParameters" : [
19
-
20
- ],
21
- "author" : "Salesforce Inc",
22
- "specificationVersion" : 9,
23
- "storagePrecision" : "Mixed (Float16, Int8)",
24
- "mlProgramOperationTypeHistogram" : {
25
- "Ios18.linear" : 19,
26
- "Ios18.notEqual" : 1,
27
- "Ios18.scaledDotProductAttention" : 3,
28
- "Ios18.expandDims" : 2,
29
- "Select" : 3,
30
- "Ios18.sub" : 1,
31
- "Ios18.gelu" : 3,
32
- "Ios18.gather" : 2,
33
- "Ios16.cumsum" : 1,
34
- "Ios18.add" : 11,
35
- "Tile" : 1,
36
- "Ios18.layerNorm" : 7,
37
- "Ios18.cast" : 4,
38
- "Ios18.transpose" : 12,
39
- "Ios18.constexprBlockwiseShiftScale" : 21,
40
- "Ios18.greaterEqual" : 2,
41
- "Ios18.reshape" : 12,
42
- "Ios18.mul" : 1
43
- },
44
- "computePrecision" : "Mixed (Float16, Int32, UInt16)",
45
- "stateSchema" : [
46
-
47
- ],
48
- "isUpdatable" : "0",
49
- "availability" : {
50
- "macOS" : "15.0",
51
- "tvOS" : "18.0",
52
- "visionOS" : "2.0",
53
- "watchOS" : "11.0",
54
- "iOS" : "18.0",
55
- "macCatalyst" : "18.0"
56
- },
57
- "modelType" : {
58
- "name" : "MLModelType_mlProgram"
59
- },
60
- "inputSchema" : [
61
- {
62
- "hasShapeFlexibility" : "0",
63
- "isOptional" : "0",
64
- "dataType" : "Int32",
65
- "formattedType" : "MultiArray (Int32 1 × 512)",
66
- "shortDescription" : "",
67
- "shape" : "[1, 512]",
68
- "name" : "input_ids",
69
- "type" : "MultiArray"
70
- },
71
- {
72
- "hasShapeFlexibility" : "0",
73
- "isOptional" : "0",
74
- "dataType" : "Int32",
75
- "formattedType" : "MultiArray (Int32 1 × 512)",
76
- "shortDescription" : "",
77
- "shape" : "[1, 512]",
78
- "name" : "attention_mask",
79
- "type" : "MultiArray"
80
- }
81
- ],
82
- "userDefinedMetadata" : {
83
- "com.github.apple.coremltools.conversion_date" : "2025-10-08",
84
- "com.github.apple.coremltools.source" : "torch==2.7.1",
85
- "com.github.apple.coremltools.version" : "9.0b1",
86
- "com.github.apple.coremltools.source_dialect" : "TorchScript"
87
- },
88
- "generatedClassName" : "SaT",
89
- "method" : "predict"
90
- }
91
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model.mil DELETED
@@ -1,204 +0,0 @@
1
- program(1.3)
2
- [buildInfo = dict<string, string>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}})]
3
- {
4
- func main<ios18>(tensor<int32, [1, 512]> attention_mask, tensor<int32, [1, 512]> input_ids) {
5
- int32 var_25 = const()[name = string("op_25"), val = int32(1)];
6
- tensor<bool, [1, 512]> var_41 = not_equal(x = input_ids, y = var_25)[name = string("op_41")];
7
- string mask_dtype_0 = const()[name = string("mask_dtype_0"), val = string("int32")];
8
- bool var_43_exclusive_0 = const()[name = string("op_43_exclusive_0"), val = bool(false)];
9
- bool var_43_reverse_0 = const()[name = string("op_43_reverse_0"), val = bool(false)];
10
- tensor<int32, [1, 512]> mask = cast(dtype = mask_dtype_0, x = var_41)[name = string("cast_3")];
11
- tensor<int32, [1, 512]> var_43 = cumsum(axis = var_25, exclusive = var_43_exclusive_0, reverse = var_43_reverse_0, x = mask)[name = string("op_43")];
12
- tensor<int32, [1, 512]> incremental_indices = mul(x = var_43, y = mask)[name = string("incremental_indices")];
13
- int32 var_49 = const()[name = string("op_49"), val = int32(1)];
14
- tensor<int32, [1, 512]> input_3 = add(x = incremental_indices, y = var_49)[name = string("input_3")];
15
- int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)];
16
- bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)];
17
- tensor<fp16, [250002, 768]> roberta_embeddings_word_embeddings_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [250002, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor<fp16, [250002, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192001664))))[name = string("roberta_embeddings_word_embeddings_weight_to_fp16_quantized")];
18
- int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
19
- tensor<bool, [1, 512]> greater_equal_0 = greater_equal(x = input_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
20
- int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(250002)];
21
- tensor<int32, [1, 512]> add_0 = add(x = input_ids, y = slice_by_index_0)[name = string("add_0")];
22
- tensor<int32, [1, 512]> select_0 = select(a = input_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")];
23
- int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)];
24
- tensor<bool, [1, 512]> greater_equal_0_1 = greater_equal(x = select_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")];
25
- int32 slice_by_index_0_1 = const()[name = string("slice_by_index_0_1"), val = int32(250002)];
26
- tensor<int32, [1, 512]> add_0_1 = add(x = select_0, y = slice_by_index_0_1)[name = string("add_0_1")];
27
- tensor<int32, [1, 512]> select_0_1 = select(a = select_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")];
28
- int32 inputs_embeds_cast_fp16_axis_0 = const()[name = string("inputs_embeds_cast_fp16_axis_0"), val = int32(0)];
29
- tensor<fp16, [1, 512, 768]> inputs_embeds_cast_fp16 = gather(axis = inputs_embeds_cast_fp16_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = select_0_1, validate_indices = inputs_embeds_validate_indices_0, x = roberta_embeddings_word_embeddings_weight_to_fp16_quantized)[name = string("inputs_embeds_cast_fp16")];
30
- tensor<fp16, [1, 512, 768]> token_type_embeddings_1_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [1, 512, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204001856))), scale = tensor<fp16, [1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204395136))))[name = string("token_type_embeddings_1_to_fp16_quantized")];
31
- tensor<fp16, [1, 512, 768]> embeddings_1_cast_fp16 = add(x = inputs_embeds_cast_fp16, y = token_type_embeddings_1_to_fp16_quantized)[name = string("embeddings_1_cast_fp16")];
32
- int32 position_embeddings_1_axis_0 = const()[name = string("position_embeddings_1_axis_0"), val = int32(0)];
33
- int32 position_embeddings_1_batch_dims_0 = const()[name = string("position_embeddings_1_batch_dims_0"), val = int32(0)];
34
- bool position_embeddings_1_validate_indices_0 = const()[name = string("position_embeddings_1_validate_indices_0"), val = bool(false)];
35
- tensor<fp16, [514, 768]> roberta_embeddings_position_embeddings_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [514, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204395264))), scale = tensor<fp16, [514, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204790080))))[name = string("roberta_embeddings_position_embeddings_weight_to_fp16_quantized")];
36
- string input_3_to_uint16_dtype_0 = const()[name = string("input_3_to_uint16_dtype_0"), val = string("uint16")];
37
- tensor<uint16, [1, 512]> input_3_to_uint16 = cast(dtype = input_3_to_uint16_dtype_0, x = input_3)[name = string("cast_2")];
38
- tensor<fp16, [1, 512, 768]> position_embeddings_1_cast_fp16_cast_uint16 = gather(axis = position_embeddings_1_axis_0, batch_dims = position_embeddings_1_batch_dims_0, indices = input_3_to_uint16, validate_indices = position_embeddings_1_validate_indices_0, x = roberta_embeddings_position_embeddings_weight_to_fp16_quantized)[name = string("position_embeddings_1_cast_fp16_cast_uint16")];
39
- tensor<fp16, [1, 512, 768]> input_5_cast_fp16 = add(x = embeddings_1_cast_fp16, y = position_embeddings_1_cast_fp16_cast_uint16)[name = string("input_5_cast_fp16")];
40
- tensor<int32, [1]> input_7_axes_0 = const()[name = string("input_7_axes_0"), val = tensor<int32, [1]>([-1])];
41
- tensor<fp16, [768]> roberta_embeddings_LayerNorm_weight_to_fp16 = const()[name = string("roberta_embeddings_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204814848)))];
42
- tensor<fp16, [768]> roberta_embeddings_LayerNorm_bias_to_fp16 = const()[name = string("roberta_embeddings_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204816448)))];
43
- fp16 var_20_to_fp16 = const()[name = string("op_20_to_fp16"), val = fp16(0x1.5p-17)];
44
- tensor<fp16, [1, 512, 768]> input_7_cast_fp16 = layer_norm(axes = input_7_axes_0, beta = roberta_embeddings_LayerNorm_bias_to_fp16, epsilon = var_20_to_fp16, gamma = roberta_embeddings_LayerNorm_weight_to_fp16, x = input_5_cast_fp16)[name = string("input_7_cast_fp16")];
45
- tensor<int32, [1]> var_67_axes_0 = const()[name = string("op_67_axes_0"), val = tensor<int32, [1]>([1])];
46
- tensor<int32, [1, 1, 512]> var_67 = expand_dims(axes = var_67_axes_0, x = attention_mask)[name = string("op_67")];
47
- tensor<int32, [1]> var_68_axes_0 = const()[name = string("op_68_axes_0"), val = tensor<int32, [1]>([2])];
48
- tensor<int32, [1, 1, 1, 512]> var_68 = expand_dims(axes = var_68_axes_0, x = var_67)[name = string("op_68")];
49
- tensor<int32, [4]> var_71_reps_0 = const()[name = string("op_71_reps_0"), val = tensor<int32, [4]>([1, 1, 512, 1])];
50
- tensor<int32, [1, 1, 512, 512]> var_71 = tile(reps = var_71_reps_0, x = var_68)[name = string("op_71")];
51
- fp16 const_4_to_fp16 = const()[name = string("const_4_to_fp16"), val = fp16(0x1p+0)];
52
- string expanded_mask_to_fp16_dtype_0 = const()[name = string("expanded_mask_to_fp16_dtype_0"), val = string("fp16")];
53
- tensor<fp16, [1, 1, 512, 512]> var_71_to_fp16 = cast(dtype = expanded_mask_to_fp16_dtype_0, x = var_71)[name = string("cast_1")];
54
- tensor<fp16, [1, 1, 512, 512]> inverted_mask_cast_fp16 = sub(x = const_4_to_fp16, y = var_71_to_fp16)[name = string("inverted_mask_cast_fp16")];
55
- string var_76_dtype_0 = const()[name = string("op_76_dtype_0"), val = string("bool")];
56
- fp16 var_9_to_fp16 = const()[name = string("op_9_to_fp16"), val = fp16(-inf)];
57
- tensor<bool, [1, 1, 512, 512]> inverted_mask_cast_fp16_to_bool = cast(dtype = var_76_dtype_0, x = inverted_mask_cast_fp16)[name = string("cast_0")];
58
- tensor<fp16, [1, 1, 512, 512]> attention_mask_cast_fp16 = select(a = var_9_to_fp16, b = inverted_mask_cast_fp16, cond = inverted_mask_cast_fp16_to_bool)[name = string("attention_mask_cast_fp16")];
59
- tensor<fp16, [768, 768]> roberta_encoder_layer_0_attention_self_query_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204818048))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205407936))))[name = string("roberta_encoder_layer_0_attention_self_query_weight_to_fp16_quantized")];
60
- tensor<fp16, [768]> roberta_encoder_layer_0_attention_self_query_bias_to_fp16 = const()[name = string("roberta_encoder_layer_0_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205444864)))];
61
- tensor<fp16, [1, 512, 768]> linear_0_cast_fp16 = linear(bias = roberta_encoder_layer_0_attention_self_query_bias_to_fp16, weight = roberta_encoder_layer_0_attention_self_query_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = string("linear_0_cast_fp16")];
62
- tensor<int32, [4]> var_97 = const()[name = string("op_97"), val = tensor<int32, [4]>([1, -1, 12, 64])];
63
- tensor<fp16, [1, 512, 12, 64]> var_98_cast_fp16 = reshape(shape = var_97, x = linear_0_cast_fp16)[name = string("op_98_cast_fp16")];
64
- tensor<fp16, [768, 768]> roberta_encoder_layer_0_attention_self_key_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205446464))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206036352))))[name = string("roberta_encoder_layer_0_attention_self_key_weight_to_fp16_quantized")];
65
- tensor<fp16, [768]> roberta_encoder_layer_0_attention_self_key_bias_to_fp16 = const()[name = string("roberta_encoder_layer_0_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206073280)))];
66
- tensor<fp16, [1, 512, 768]> linear_1_cast_fp16 = linear(bias = roberta_encoder_layer_0_attention_self_key_bias_to_fp16, weight = roberta_encoder_layer_0_attention_self_key_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = string("linear_1_cast_fp16")];
67
- tensor<int32, [4]> var_103 = const()[name = string("op_103"), val = tensor<int32, [4]>([1, -1, 12, 64])];
68
- tensor<fp16, [1, 512, 12, 64]> var_104_cast_fp16 = reshape(shape = var_103, x = linear_1_cast_fp16)[name = string("op_104_cast_fp16")];
69
- tensor<fp16, [768, 768]> roberta_encoder_layer_0_attention_self_value_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206074880))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206664768))))[name = string("roberta_encoder_layer_0_attention_self_value_weight_to_fp16_quantized")];
70
- tensor<fp16, [768]> roberta_encoder_layer_0_attention_self_value_bias_to_fp16 = const()[name = string("roberta_encoder_layer_0_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206701696)))];
71
- tensor<fp16, [1, 512, 768]> linear_2_cast_fp16 = linear(bias = roberta_encoder_layer_0_attention_self_value_bias_to_fp16, weight = roberta_encoder_layer_0_attention_self_value_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = string("linear_2_cast_fp16")];
72
- tensor<int32, [4]> var_109 = const()[name = string("op_109"), val = tensor<int32, [4]>([1, -1, 12, 64])];
73
- tensor<fp16, [1, 512, 12, 64]> var_110_cast_fp16 = reshape(shape = var_109, x = linear_2_cast_fp16)[name = string("op_110_cast_fp16")];
74
- tensor<int32, [4]> transpose_18_perm_0 = const()[name = string("transpose_18_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
75
- tensor<int32, [4]> transpose_19_perm_0 = const()[name = string("transpose_19_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
76
- tensor<int32, [4]> transpose_20_perm_0 = const()[name = string("transpose_20_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
77
- tensor<fp16, [1, 12, 512, 64]> transpose_20 = transpose(perm = transpose_20_perm_0, x = var_110_cast_fp16)[name = string("transpose_36")];
78
- tensor<fp16, [1, 12, 512, 64]> transpose_19 = transpose(perm = transpose_19_perm_0, x = var_104_cast_fp16)[name = string("transpose_37")];
79
- tensor<fp16, [1, 12, 512, 64]> transpose_18 = transpose(perm = transpose_18_perm_0, x = var_98_cast_fp16)[name = string("transpose_38")];
80
- tensor<fp16, [1, 12, 512, 64]> attn_output_1_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_19, query = transpose_18, value = transpose_20)[name = string("attn_output_1_cast_fp16")];
81
- tensor<int32, [4]> attn_output_3_perm_0 = const()[name = string("attn_output_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
82
- tensor<int32, [3]> var_114 = const()[name = string("op_114"), val = tensor<int32, [3]>([1, 512, 768])];
83
- tensor<fp16, [1, 512, 12, 64]> attn_output_3_cast_fp16 = transpose(perm = attn_output_3_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_35")];
84
- tensor<fp16, [1, 512, 768]> input_9_cast_fp16 = reshape(shape = var_114, x = attn_output_3_cast_fp16)[name = string("input_9_cast_fp16")];
85
- tensor<fp16, [768, 768]> roberta_encoder_layer_0_attention_output_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206703296))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207293184))))[name = string("roberta_encoder_layer_0_attention_output_dense_weight_to_fp16_quantized")];
86
- tensor<fp16, [768]> roberta_encoder_layer_0_attention_output_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_0_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207330112)))];
87
- tensor<fp16, [1, 512, 768]> linear_3_cast_fp16 = linear(bias = roberta_encoder_layer_0_attention_output_dense_bias_to_fp16, weight = roberta_encoder_layer_0_attention_output_dense_weight_to_fp16_quantized, x = input_9_cast_fp16)[name = string("linear_3_cast_fp16")];
88
- tensor<fp16, [1, 512, 768]> input_13_cast_fp16 = add(x = linear_3_cast_fp16, y = input_7_cast_fp16)[name = string("input_13_cast_fp16")];
89
- tensor<int32, [1]> input_15_axes_0 = const()[name = string("input_15_axes_0"), val = tensor<int32, [1]>([-1])];
90
- tensor<fp16, [768]> roberta_encoder_layer_0_attention_output_LayerNorm_weight_to_fp16 = const()[name = string("roberta_encoder_layer_0_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207331712)))];
91
- tensor<fp16, [768]> roberta_encoder_layer_0_attention_output_LayerNorm_bias_to_fp16 = const()[name = string("roberta_encoder_layer_0_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207333312)))];
92
- tensor<fp16, [1, 512, 768]> input_15_cast_fp16 = layer_norm(axes = input_15_axes_0, beta = roberta_encoder_layer_0_attention_output_LayerNorm_bias_to_fp16, epsilon = var_20_to_fp16, gamma = roberta_encoder_layer_0_attention_output_LayerNorm_weight_to_fp16, x = input_13_cast_fp16)[name = string("input_15_cast_fp16")];
93
- tensor<fp16, [3072, 768]> roberta_encoder_layer_0_intermediate_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207334912))), scale = tensor<fp16, [3072, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209694272))))[name = string("roberta_encoder_layer_0_intermediate_dense_weight_to_fp16_quantized")];
94
- tensor<fp16, [3072]> roberta_encoder_layer_0_intermediate_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_0_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209841792)))];
95
- tensor<fp16, [1, 512, 3072]> linear_4_cast_fp16 = linear(bias = roberta_encoder_layer_0_intermediate_dense_bias_to_fp16, weight = roberta_encoder_layer_0_intermediate_dense_weight_to_fp16_quantized, x = input_15_cast_fp16)[name = string("linear_4_cast_fp16")];
96
- string input_19_mode_0 = const()[name = string("input_19_mode_0"), val = string("EXACT")];
97
- tensor<fp16, [1, 512, 3072]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = linear_4_cast_fp16)[name = string("input_19_cast_fp16")];
98
- tensor<fp16, [768, 3072]> roberta_encoder_layer_0_output_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209848000))), scale = tensor<fp16, [768, 96]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212207360))))[name = string("roberta_encoder_layer_0_output_dense_weight_to_fp16_quantized")];
99
- tensor<fp16, [768]> roberta_encoder_layer_0_output_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_0_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212354880)))];
100
- tensor<fp16, [1, 512, 768]> linear_5_cast_fp16 = linear(bias = roberta_encoder_layer_0_output_dense_bias_to_fp16, weight = roberta_encoder_layer_0_output_dense_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = string("linear_5_cast_fp16")];
101
- tensor<fp16, [1, 512, 768]> input_23_cast_fp16 = add(x = linear_5_cast_fp16, y = input_15_cast_fp16)[name = string("input_23_cast_fp16")];
102
- tensor<int32, [1]> hidden_states_7_axes_0 = const()[name = string("hidden_states_7_axes_0"), val = tensor<int32, [1]>([-1])];
103
- tensor<fp16, [768]> roberta_encoder_layer_0_output_LayerNorm_weight_to_fp16 = const()[name = string("roberta_encoder_layer_0_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212356480)))];
104
- tensor<fp16, [768]> roberta_encoder_layer_0_output_LayerNorm_bias_to_fp16 = const()[name = string("roberta_encoder_layer_0_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212358080)))];
105
- tensor<fp16, [1, 512, 768]> hidden_states_7_cast_fp16 = layer_norm(axes = hidden_states_7_axes_0, beta = roberta_encoder_layer_0_output_LayerNorm_bias_to_fp16, epsilon = var_20_to_fp16, gamma = roberta_encoder_layer_0_output_LayerNorm_weight_to_fp16, x = input_23_cast_fp16)[name = string("hidden_states_7_cast_fp16")];
106
- tensor<fp16, [768, 768]> roberta_encoder_layer_1_attention_self_query_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212359680))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212949568))))[name = string("roberta_encoder_layer_1_attention_self_query_weight_to_fp16_quantized")];
107
- tensor<fp16, [768]> roberta_encoder_layer_1_attention_self_query_bias_to_fp16 = const()[name = string("roberta_encoder_layer_1_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212986496)))];
108
- tensor<fp16, [1, 512, 768]> linear_6_cast_fp16 = linear(bias = roberta_encoder_layer_1_attention_self_query_bias_to_fp16, weight = roberta_encoder_layer_1_attention_self_query_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_6_cast_fp16")];
109
- tensor<int32, [4]> var_156 = const()[name = string("op_156"), val = tensor<int32, [4]>([1, -1, 12, 64])];
110
- tensor<fp16, [1, 512, 12, 64]> var_157_cast_fp16 = reshape(shape = var_156, x = linear_6_cast_fp16)[name = string("op_157_cast_fp16")];
111
- tensor<fp16, [768, 768]> roberta_encoder_layer_1_attention_self_key_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212988096))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213577984))))[name = string("roberta_encoder_layer_1_attention_self_key_weight_to_fp16_quantized")];
112
- tensor<fp16, [768]> roberta_encoder_layer_1_attention_self_key_bias_to_fp16 = const()[name = string("roberta_encoder_layer_1_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213614912)))];
113
- tensor<fp16, [1, 512, 768]> linear_7_cast_fp16 = linear(bias = roberta_encoder_layer_1_attention_self_key_bias_to_fp16, weight = roberta_encoder_layer_1_attention_self_key_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_7_cast_fp16")];
114
- tensor<int32, [4]> var_162 = const()[name = string("op_162"), val = tensor<int32, [4]>([1, -1, 12, 64])];
115
- tensor<fp16, [1, 512, 12, 64]> var_163_cast_fp16 = reshape(shape = var_162, x = linear_7_cast_fp16)[name = string("op_163_cast_fp16")];
116
- tensor<fp16, [768, 768]> roberta_encoder_layer_1_attention_self_value_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(213616512))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214206400))))[name = string("roberta_encoder_layer_1_attention_self_value_weight_to_fp16_quantized")];
117
- tensor<fp16, [768]> roberta_encoder_layer_1_attention_self_value_bias_to_fp16 = const()[name = string("roberta_encoder_layer_1_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214243328)))];
118
- tensor<fp16, [1, 512, 768]> linear_8_cast_fp16 = linear(bias = roberta_encoder_layer_1_attention_self_value_bias_to_fp16, weight = roberta_encoder_layer_1_attention_self_value_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_8_cast_fp16")];
119
- tensor<int32, [4]> var_168 = const()[name = string("op_168"), val = tensor<int32, [4]>([1, -1, 12, 64])];
120
- tensor<fp16, [1, 512, 12, 64]> var_169_cast_fp16 = reshape(shape = var_168, x = linear_8_cast_fp16)[name = string("op_169_cast_fp16")];
121
- tensor<int32, [4]> transpose_21_perm_0 = const()[name = string("transpose_21_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
122
- tensor<int32, [4]> transpose_22_perm_0 = const()[name = string("transpose_22_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
123
- tensor<int32, [4]> transpose_23_perm_0 = const()[name = string("transpose_23_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
124
- tensor<fp16, [1, 12, 512, 64]> transpose_23 = transpose(perm = transpose_23_perm_0, x = var_169_cast_fp16)[name = string("transpose_32")];
125
- tensor<fp16, [1, 12, 512, 64]> transpose_22 = transpose(perm = transpose_22_perm_0, x = var_163_cast_fp16)[name = string("transpose_33")];
126
- tensor<fp16, [1, 12, 512, 64]> transpose_21 = transpose(perm = transpose_21_perm_0, x = var_157_cast_fp16)[name = string("transpose_34")];
127
- tensor<fp16, [1, 12, 512, 64]> attn_output_5_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_22, query = transpose_21, value = transpose_23)[name = string("attn_output_5_cast_fp16")];
128
- tensor<int32, [4]> attn_output_7_perm_0 = const()[name = string("attn_output_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
129
- tensor<int32, [3]> var_173 = const()[name = string("op_173"), val = tensor<int32, [3]>([1, 512, 768])];
130
- tensor<fp16, [1, 512, 12, 64]> attn_output_7_cast_fp16 = transpose(perm = attn_output_7_perm_0, x = attn_output_5_cast_fp16)[name = string("transpose_31")];
131
- tensor<fp16, [1, 512, 768]> input_25_cast_fp16 = reshape(shape = var_173, x = attn_output_7_cast_fp16)[name = string("input_25_cast_fp16")];
132
- tensor<fp16, [768, 768]> roberta_encoder_layer_1_attention_output_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214244928))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214834816))))[name = string("roberta_encoder_layer_1_attention_output_dense_weight_to_fp16_quantized")];
133
- tensor<fp16, [768]> roberta_encoder_layer_1_attention_output_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_1_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214871744)))];
134
- tensor<fp16, [1, 512, 768]> linear_9_cast_fp16 = linear(bias = roberta_encoder_layer_1_attention_output_dense_bias_to_fp16, weight = roberta_encoder_layer_1_attention_output_dense_weight_to_fp16_quantized, x = input_25_cast_fp16)[name = string("linear_9_cast_fp16")];
135
- tensor<fp16, [1, 512, 768]> input_29_cast_fp16 = add(x = linear_9_cast_fp16, y = hidden_states_7_cast_fp16)[name = string("input_29_cast_fp16")];
136
- tensor<int32, [1]> input_31_axes_0 = const()[name = string("input_31_axes_0"), val = tensor<int32, [1]>([-1])];
137
- tensor<fp16, [768]> roberta_encoder_layer_1_attention_output_LayerNorm_weight_to_fp16 = const()[name = string("roberta_encoder_layer_1_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214873344)))];
138
- tensor<fp16, [768]> roberta_encoder_layer_1_attention_output_LayerNorm_bias_to_fp16 = const()[name = string("roberta_encoder_layer_1_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214874944)))];
139
- tensor<fp16, [1, 512, 768]> input_31_cast_fp16 = layer_norm(axes = input_31_axes_0, beta = roberta_encoder_layer_1_attention_output_LayerNorm_bias_to_fp16, epsilon = var_20_to_fp16, gamma = roberta_encoder_layer_1_attention_output_LayerNorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("input_31_cast_fp16")];
140
- tensor<fp16, [3072, 768]> roberta_encoder_layer_1_intermediate_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214876544))), scale = tensor<fp16, [3072, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217235904))))[name = string("roberta_encoder_layer_1_intermediate_dense_weight_to_fp16_quantized")];
141
- tensor<fp16, [3072]> roberta_encoder_layer_1_intermediate_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_1_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217383424)))];
142
- tensor<fp16, [1, 512, 3072]> linear_10_cast_fp16 = linear(bias = roberta_encoder_layer_1_intermediate_dense_bias_to_fp16, weight = roberta_encoder_layer_1_intermediate_dense_weight_to_fp16_quantized, x = input_31_cast_fp16)[name = string("linear_10_cast_fp16")];
143
- string input_35_mode_0 = const()[name = string("input_35_mode_0"), val = string("EXACT")];
144
- tensor<fp16, [1, 512, 3072]> input_35_cast_fp16 = gelu(mode = input_35_mode_0, x = linear_10_cast_fp16)[name = string("input_35_cast_fp16")];
145
- tensor<fp16, [768, 3072]> roberta_encoder_layer_1_output_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(217389632))), scale = tensor<fp16, [768, 96]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219748992))))[name = string("roberta_encoder_layer_1_output_dense_weight_to_fp16_quantized")];
146
- tensor<fp16, [768]> roberta_encoder_layer_1_output_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_1_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219896512)))];
147
- tensor<fp16, [1, 512, 768]> linear_11_cast_fp16 = linear(bias = roberta_encoder_layer_1_output_dense_bias_to_fp16, weight = roberta_encoder_layer_1_output_dense_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = string("linear_11_cast_fp16")];
148
- tensor<fp16, [1, 512, 768]> input_39_cast_fp16 = add(x = linear_11_cast_fp16, y = input_31_cast_fp16)[name = string("input_39_cast_fp16")];
149
- tensor<int32, [1]> hidden_states_13_axes_0 = const()[name = string("hidden_states_13_axes_0"), val = tensor<int32, [1]>([-1])];
150
- tensor<fp16, [768]> roberta_encoder_layer_1_output_LayerNorm_weight_to_fp16 = const()[name = string("roberta_encoder_layer_1_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219898112)))];
151
- tensor<fp16, [768]> roberta_encoder_layer_1_output_LayerNorm_bias_to_fp16 = const()[name = string("roberta_encoder_layer_1_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219899712)))];
152
- tensor<fp16, [1, 512, 768]> hidden_states_13_cast_fp16 = layer_norm(axes = hidden_states_13_axes_0, beta = roberta_encoder_layer_1_output_LayerNorm_bias_to_fp16, epsilon = var_20_to_fp16, gamma = roberta_encoder_layer_1_output_LayerNorm_weight_to_fp16, x = input_39_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
153
- tensor<fp16, [768, 768]> roberta_encoder_layer_2_attention_self_query_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219901312))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220491200))))[name = string("roberta_encoder_layer_2_attention_self_query_weight_to_fp16_quantized")];
154
- tensor<fp16, [768]> roberta_encoder_layer_2_attention_self_query_bias_to_fp16 = const()[name = string("roberta_encoder_layer_2_attention_self_query_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220528128)))];
155
- tensor<fp16, [1, 512, 768]> linear_12_cast_fp16 = linear(bias = roberta_encoder_layer_2_attention_self_query_bias_to_fp16, weight = roberta_encoder_layer_2_attention_self_query_weight_to_fp16_quantized, x = hidden_states_13_cast_fp16)[name = string("linear_12_cast_fp16")];
156
- tensor<int32, [4]> var_215 = const()[name = string("op_215"), val = tensor<int32, [4]>([1, -1, 12, 64])];
157
- tensor<fp16, [1, 512, 12, 64]> var_216_cast_fp16 = reshape(shape = var_215, x = linear_12_cast_fp16)[name = string("op_216_cast_fp16")];
158
- tensor<fp16, [768, 768]> roberta_encoder_layer_2_attention_self_key_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220529728))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221119616))))[name = string("roberta_encoder_layer_2_attention_self_key_weight_to_fp16_quantized")];
159
- tensor<fp16, [768]> roberta_encoder_layer_2_attention_self_key_bias_to_fp16 = const()[name = string("roberta_encoder_layer_2_attention_self_key_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221156544)))];
160
- tensor<fp16, [1, 512, 768]> linear_13_cast_fp16 = linear(bias = roberta_encoder_layer_2_attention_self_key_bias_to_fp16, weight = roberta_encoder_layer_2_attention_self_key_weight_to_fp16_quantized, x = hidden_states_13_cast_fp16)[name = string("linear_13_cast_fp16")];
161
- tensor<int32, [4]> var_221 = const()[name = string("op_221"), val = tensor<int32, [4]>([1, -1, 12, 64])];
162
- tensor<fp16, [1, 512, 12, 64]> var_222_cast_fp16 = reshape(shape = var_221, x = linear_13_cast_fp16)[name = string("op_222_cast_fp16")];
163
- tensor<fp16, [768, 768]> roberta_encoder_layer_2_attention_self_value_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221158144))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221748032))))[name = string("roberta_encoder_layer_2_attention_self_value_weight_to_fp16_quantized")];
164
- tensor<fp16, [768]> roberta_encoder_layer_2_attention_self_value_bias_to_fp16 = const()[name = string("roberta_encoder_layer_2_attention_self_value_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221784960)))];
165
- tensor<fp16, [1, 512, 768]> linear_14_cast_fp16 = linear(bias = roberta_encoder_layer_2_attention_self_value_bias_to_fp16, weight = roberta_encoder_layer_2_attention_self_value_weight_to_fp16_quantized, x = hidden_states_13_cast_fp16)[name = string("linear_14_cast_fp16")];
166
- tensor<int32, [4]> var_227 = const()[name = string("op_227"), val = tensor<int32, [4]>([1, -1, 12, 64])];
167
- tensor<fp16, [1, 512, 12, 64]> var_228_cast_fp16 = reshape(shape = var_227, x = linear_14_cast_fp16)[name = string("op_228_cast_fp16")];
168
- tensor<int32, [4]> transpose_24_perm_0 = const()[name = string("transpose_24_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
169
- tensor<int32, [4]> transpose_25_perm_0 = const()[name = string("transpose_25_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
170
- tensor<int32, [4]> transpose_26_perm_0 = const()[name = string("transpose_26_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
171
- tensor<fp16, [1, 12, 512, 64]> transpose_26 = transpose(perm = transpose_26_perm_0, x = var_228_cast_fp16)[name = string("transpose_28")];
172
- tensor<fp16, [1, 12, 512, 64]> transpose_25 = transpose(perm = transpose_25_perm_0, x = var_222_cast_fp16)[name = string("transpose_29")];
173
- tensor<fp16, [1, 12, 512, 64]> transpose_24 = transpose(perm = transpose_24_perm_0, x = var_216_cast_fp16)[name = string("transpose_30")];
174
- tensor<fp16, [1, 12, 512, 64]> attn_output_9_cast_fp16 = scaled_dot_product_attention(attn_mask = attention_mask_cast_fp16, key = transpose_25, query = transpose_24, value = transpose_26)[name = string("attn_output_9_cast_fp16")];
175
- tensor<int32, [4]> attn_output_perm_0 = const()[name = string("attn_output_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
176
- tensor<int32, [3]> var_232 = const()[name = string("op_232"), val = tensor<int32, [3]>([1, 512, 768])];
177
- tensor<fp16, [1, 512, 12, 64]> attn_output_cast_fp16 = transpose(perm = attn_output_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_27")];
178
- tensor<fp16, [1, 512, 768]> input_41_cast_fp16 = reshape(shape = var_232, x = attn_output_cast_fp16)[name = string("input_41_cast_fp16")];
179
- tensor<fp16, [768, 768]> roberta_encoder_layer_2_attention_output_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221786560))), scale = tensor<fp16, [768, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222376448))))[name = string("roberta_encoder_layer_2_attention_output_dense_weight_to_fp16_quantized")];
180
- tensor<fp16, [768]> roberta_encoder_layer_2_attention_output_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_2_attention_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222413376)))];
181
- tensor<fp16, [1, 512, 768]> linear_15_cast_fp16 = linear(bias = roberta_encoder_layer_2_attention_output_dense_bias_to_fp16, weight = roberta_encoder_layer_2_attention_output_dense_weight_to_fp16_quantized, x = input_41_cast_fp16)[name = string("linear_15_cast_fp16")];
182
- tensor<fp16, [1, 512, 768]> input_45_cast_fp16 = add(x = linear_15_cast_fp16, y = hidden_states_13_cast_fp16)[name = string("input_45_cast_fp16")];
183
- tensor<int32, [1]> input_47_axes_0 = const()[name = string("input_47_axes_0"), val = tensor<int32, [1]>([-1])];
184
- tensor<fp16, [768]> roberta_encoder_layer_2_attention_output_LayerNorm_weight_to_fp16 = const()[name = string("roberta_encoder_layer_2_attention_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222414976)))];
185
- tensor<fp16, [768]> roberta_encoder_layer_2_attention_output_LayerNorm_bias_to_fp16 = const()[name = string("roberta_encoder_layer_2_attention_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222416576)))];
186
- tensor<fp16, [1, 512, 768]> input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, beta = roberta_encoder_layer_2_attention_output_LayerNorm_bias_to_fp16, epsilon = var_20_to_fp16, gamma = roberta_encoder_layer_2_attention_output_LayerNorm_weight_to_fp16, x = input_45_cast_fp16)[name = string("input_47_cast_fp16")];
187
- tensor<fp16, [3072, 768]> roberta_encoder_layer_2_intermediate_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [3072, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222418176))), scale = tensor<fp16, [3072, 24]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224777536))))[name = string("roberta_encoder_layer_2_intermediate_dense_weight_to_fp16_quantized")];
188
- tensor<fp16, [3072]> roberta_encoder_layer_2_intermediate_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_2_intermediate_dense_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224925056)))];
189
- tensor<fp16, [1, 512, 3072]> linear_16_cast_fp16 = linear(bias = roberta_encoder_layer_2_intermediate_dense_bias_to_fp16, weight = roberta_encoder_layer_2_intermediate_dense_weight_to_fp16_quantized, x = input_47_cast_fp16)[name = string("linear_16_cast_fp16")];
190
- string input_51_mode_0 = const()[name = string("input_51_mode_0"), val = string("EXACT")];
191
- tensor<fp16, [1, 512, 3072]> input_51_cast_fp16 = gelu(mode = input_51_mode_0, x = linear_16_cast_fp16)[name = string("input_51_cast_fp16")];
192
- tensor<fp16, [768, 3072]> roberta_encoder_layer_2_output_dense_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor<int8, [768, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224931264))), scale = tensor<fp16, [768, 96]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227290624))))[name = string("roberta_encoder_layer_2_output_dense_weight_to_fp16_quantized")];
193
- tensor<fp16, [768]> roberta_encoder_layer_2_output_dense_bias_to_fp16 = const()[name = string("roberta_encoder_layer_2_output_dense_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227438144)))];
194
- tensor<fp16, [1, 512, 768]> linear_17_cast_fp16 = linear(bias = roberta_encoder_layer_2_output_dense_bias_to_fp16, weight = roberta_encoder_layer_2_output_dense_weight_to_fp16_quantized, x = input_51_cast_fp16)[name = string("linear_17_cast_fp16")];
195
- tensor<fp16, [1, 512, 768]> input_55_cast_fp16 = add(x = linear_17_cast_fp16, y = input_47_cast_fp16)[name = string("input_55_cast_fp16")];
196
- tensor<int32, [1]> input_57_axes_0 = const()[name = string("input_57_axes_0"), val = tensor<int32, [1]>([-1])];
197
- tensor<fp16, [768]> roberta_encoder_layer_2_output_LayerNorm_weight_to_fp16 = const()[name = string("roberta_encoder_layer_2_output_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227439744)))];
198
- tensor<fp16, [768]> roberta_encoder_layer_2_output_LayerNorm_bias_to_fp16 = const()[name = string("roberta_encoder_layer_2_output_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227441344)))];
199
- tensor<fp16, [1, 512, 768]> input_57_cast_fp16 = layer_norm(axes = input_57_axes_0, beta = roberta_encoder_layer_2_output_LayerNorm_bias_to_fp16, epsilon = var_20_to_fp16, gamma = roberta_encoder_layer_2_output_LayerNorm_weight_to_fp16, x = input_55_cast_fp16)[name = string("input_57_cast_fp16")];
200
- tensor<fp16, [1, 768]> classifier_weight_to_fp16 = const()[name = string("classifier_weight_to_fp16"), val = tensor<fp16, [1, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227442944)))];
201
- tensor<fp16, [1]> classifier_bias_to_fp16 = const()[name = string("classifier_bias_to_fp16"), val = tensor<fp16, [1]>([0x1.678p-12])];
202
- tensor<fp16, [1, 512, 1]> output = linear(bias = classifier_bias_to_fp16, weight = classifier_weight_to_fp16, x = input_57_cast_fp16)[name = string("linear_18_cast_fp16")];
203
- } -> (output);
204
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
weights/weight.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0395a363b84b8e638bc43ad40464023376e8efa4efdcae8389a1bffcf6b0c50
3
- size 227444544