program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}})] { func main(tensor mask, tensor tokens) { tensor x_1_batch_dims_0 = const()[name = tensor("x_1_batch_dims_0"), val = tensor(0)]; tensor x_1_validate_indices_0 = const()[name = tensor("x_1_validate_indices_0"), val = tensor(false)]; tensor text_embedding_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("text_embedding_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1816576))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1814144)))]; tensor tokens_to_int16_dtype_0 = const()[name = tensor("tokens_to_int16_dtype_0"), val = tensor("int16")]; tensor cast_14_dtype_0 = const()[name = tensor("cast_14_dtype_0"), val = tensor("int32")]; tensor greater_equal_0_y_0 = const()[name = tensor("greater_equal_0_y_0"), val = tensor(0)]; tensor tokens_to_int16 = cast(dtype = tokens_to_int16_dtype_0, x = tokens)[name = tensor("cast_6")]; tensor cast_14 = cast(dtype = cast_14_dtype_0, x = tokens_to_int16)[name = tensor("cast_5")]; tensor greater_equal_0 = greater_equal(x = cast_14, y = greater_equal_0_y_0)[name = tensor("greater_equal_0")]; tensor slice_by_index_0 = const()[name = tensor("slice_by_index_0"), val = tensor(2362)]; tensor add_0 = add(x = cast_14, y = slice_by_index_0)[name = tensor("add_0")]; tensor select_0 = select(a = cast_14, b = add_0, cond = greater_equal_0)[name = tensor("select_0")]; tensor select_0_to_int16_dtype_0 = const()[name = tensor("select_0_to_int16_dtype_0"), val = tensor("int16")]; tensor cast_0_dtype_0 = const()[name = tensor("cast_0_dtype_0"), val = tensor("int32")]; tensor greater_equal_0_y_0_1 = const()[name = tensor("greater_equal_0_y_0_1"), val = tensor(0)]; tensor select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = tensor("cast_4")]; tensor cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = tensor("cast_3")]; tensor greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = tensor("greater_equal_0_1")]; tensor slice_by_index_0_1 = const()[name = tensor("slice_by_index_0_1"), val = tensor(2362)]; tensor add_0_1 = add(x = cast_0, y = slice_by_index_0_1)[name = tensor("add_0_1")]; tensor select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = tensor("select_0_1")]; tensor x_1_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = tensor("x_1_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = tensor(0)]; tensor x_1_cast_fp16_cast_uint16_cast_uint16 = gather(axis = x_1_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = x_1_batch_dims_0, indices = select_0_1, validate_indices = x_1_validate_indices_0, x = text_embedding_weight_to_fp16_quantized)[name = tensor("x_1_cast_fp16_cast_uint16_cast_uint16")]; tensor var_26 = const()[name = tensor("op_26"), val = tensor(-1)]; tensor op_50_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("op_50_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1821376))), scale = tensor(0x1.544p-6), zero_point = tensor(0)]; tensor input_5_cast_fp16 = add(x = x_1_cast_fp16_cast_uint16_cast_uint16, y = op_50_to_fp16_quantized)[name = tensor("input_5_cast_fp16")]; tensor var_57_axes_0 = const()[name = tensor("op_57_axes_0"), val = tensor([-1])]; tensor mask_to_fp16_dtype_0 = const()[name = tensor("mask_to_fp16_dtype_0"), val = tensor("fp16")]; tensor mask_to_fp16 = cast(dtype = mask_to_fp16_dtype_0, x = mask)[name = tensor("cast_2")]; tensor var_57_cast_fp16 = expand_dims(axes = var_57_axes_0, x = mask_to_fp16)[name = tensor("op_57_cast_fp16")]; tensor input_7_cast_fp16 = mul(x = input_5_cast_fp16, y = var_57_cast_fp16)[name = tensor("input_7_cast_fp16")]; tensor query_1_axes_0 = const()[name = tensor("query_1_axes_0"), val = tensor([-1])]; tensor encoder_layers_0_norm_self_weight_to_fp16 = const()[name = tensor("encoder_layers_0_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2018048)))]; tensor var_15_to_fp16 = const()[name = tensor("op_15_to_fp16"), val = tensor(0x1.5p-17)]; tensor query_1_cast_fp16 = layer_norm(axes = query_1_axes_0, epsilon = var_15_to_fp16, gamma = encoder_layers_0_norm_self_weight_to_fp16, x = input_7_cast_fp16)[name = tensor("query_1_cast_fp16")]; tensor encoder_layers_0_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_layers_0_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2019648))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3791552))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3789184)))]; tensor linear_0_bias_0_to_fp16 = const()[name = tensor("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3796224)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = encoder_layers_0_self_attention_qkv_net_weight_to_fp16_quantized, x = query_1_cast_fp16)[name = tensor("linear_0_cast_fp16")]; tensor var_69 = const()[name = tensor("op_69"), val = tensor([1, 256, 3, 12, 64])]; tensor qkv_1_cast_fp16 = reshape(shape = var_69, x = linear_0_cast_fp16)[name = tensor("qkv_1_cast_fp16")]; tensor var_71_split_sizes_0 = const()[name = tensor("op_71_split_sizes_0"), val = tensor([1, 1, 1])]; tensor var_71_axis_0 = const()[name = tensor("op_71_axis_0"), val = tensor(2)]; tensor var_71_cast_fp16_0, tensor var_71_cast_fp16_1, tensor var_71_cast_fp16_2 = split(axis = var_71_axis_0, split_sizes = var_71_split_sizes_0, x = qkv_1_cast_fp16)[name = tensor("op_71_cast_fp16")]; tensor q_3_axes_0 = const()[name = tensor("q_3_axes_0"), val = tensor([2])]; tensor q_3_cast_fp16 = squeeze(axes = q_3_axes_0, x = var_71_cast_fp16_0)[name = tensor("q_3_cast_fp16")]; tensor k_3_axes_0 = const()[name = tensor("k_3_axes_0"), val = tensor([2])]; tensor k_3_cast_fp16 = squeeze(axes = k_3_axes_0, x = var_71_cast_fp16_1)[name = tensor("k_3_cast_fp16")]; tensor v_3_axes_0 = const()[name = tensor("v_3_axes_0"), val = tensor([2])]; tensor v_3_cast_fp16 = squeeze(axes = v_3_axes_0, x = var_71_cast_fp16_2)[name = tensor("v_3_cast_fp16")]; tensor var_77_axes_0 = const()[name = tensor("op_77_axes_0"), val = tensor([1])]; tensor var_77_cast_fp16 = expand_dims(axes = var_77_axes_0, x = mask_to_fp16)[name = tensor("op_77_cast_fp16")]; tensor var_78_axes_0 = const()[name = tensor("op_78_axes_0"), val = tensor([2])]; tensor var_78_cast_fp16 = expand_dims(axes = var_78_axes_0, x = mask_to_fp16)[name = tensor("op_78_cast_fp16")]; tensor mask_3_cast_fp16 = mul(x = var_77_cast_fp16, y = var_78_cast_fp16)[name = tensor("mask_3_cast_fp16")]; tensor mask_5_axes_0 = const()[name = tensor("mask_5_axes_0"), val = tensor([1])]; tensor mask_5_cast_fp16 = expand_dims(axes = mask_5_axes_0, x = mask_3_cast_fp16)[name = tensor("mask_5_cast_fp16")]; tensor v_5_perm_0 = const()[name = tensor("v_5_perm_0"), val = tensor([0, 2, -3, -1])]; tensor var_87_transpose_x_0 = const()[name = tensor("op_87_transpose_x_0"), val = tensor(false)]; tensor var_87_transpose_y_0 = const()[name = tensor("op_87_transpose_y_0"), val = tensor(false)]; tensor transpose_24_perm_0 = const()[name = tensor("transpose_24_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_25_perm_0 = const()[name = tensor("transpose_25_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_25 = transpose(perm = transpose_25_perm_0, x = k_3_cast_fp16)[name = tensor("transpose_70")]; tensor transpose_24 = transpose(perm = transpose_24_perm_0, x = q_3_cast_fp16)[name = tensor("transpose_71")]; tensor var_87_cast_fp16 = matmul(transpose_x = var_87_transpose_x_0, transpose_y = var_87_transpose_y_0, x = transpose_24, y = transpose_25)[name = tensor("op_87_cast_fp16")]; tensor var_88_to_fp16 = const()[name = tensor("op_88_to_fp16"), val = tensor(0x1p-3)]; tensor attn_score_1_cast_fp16 = mul(x = var_87_cast_fp16, y = var_88_to_fp16)[name = tensor("attn_score_1_cast_fp16")]; tensor var_27_promoted_to_fp16 = const()[name = tensor("op_27_promoted_to_fp16"), val = tensor(0x0p+0)]; tensor var_90_cast_fp16 = equal(x = mask_5_cast_fp16, y = var_27_promoted_to_fp16)[name = tensor("op_90_cast_fp16")]; tensor var_18_to_fp16 = const()[name = tensor("op_18_to_fp16"), val = tensor(-inf)]; tensor attn_score_3_cast_fp16 = select(a = var_18_to_fp16, b = attn_score_1_cast_fp16, cond = var_90_cast_fp16)[name = tensor("attn_score_3_cast_fp16")]; tensor input_9_cast_fp16_x_0 = const()[name = tensor("input_9_cast_fp16_x_0"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3800896)))]; tensor input_9_cast_fp16 = add(x = input_9_cast_fp16_x_0, y = attn_score_3_cast_fp16)[name = tensor("input_9_cast_fp16")]; tensor attn_prob_1_cast_fp16 = softmax(axis = var_26, x = input_9_cast_fp16)[name = tensor("attn_prob_1_cast_fp16")]; tensor var_17_to_fp16 = const()[name = tensor("op_17_to_fp16"), val = tensor(0x0p+0)]; tensor input_11_cast_fp16 = select(a = var_17_to_fp16, b = attn_prob_1_cast_fp16, cond = var_90_cast_fp16)[name = tensor("input_11_cast_fp16")]; tensor y_1_transpose_x_0 = const()[name = tensor("y_1_transpose_x_0"), val = tensor(false)]; tensor y_1_transpose_y_0 = const()[name = tensor("y_1_transpose_y_0"), val = tensor(false)]; tensor v_5_cast_fp16 = transpose(perm = v_5_perm_0, x = v_3_cast_fp16)[name = tensor("transpose_69")]; tensor y_1_cast_fp16 = matmul(transpose_x = y_1_transpose_x_0, transpose_y = y_1_transpose_y_0, x = input_11_cast_fp16, y = v_5_cast_fp16)[name = tensor("y_1_cast_fp16")]; tensor var_101_perm_0 = const()[name = tensor("op_101_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_103 = const()[name = tensor("op_103"), val = tensor([1, 256, -1])]; tensor var_101_cast_fp16 = transpose(perm = var_101_perm_0, x = y_1_cast_fp16)[name = tensor("transpose_68")]; tensor input_13_cast_fp16 = reshape(shape = var_103, x = var_101_cast_fp16)[name = tensor("input_13_cast_fp16")]; tensor encoder_layers_0_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_layers_0_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3932032))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4522752))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4521920)))]; tensor linear_1_bias_0_to_fp16 = const()[name = tensor("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4524352)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = encoder_layers_0_self_attention_o_net_weight_to_fp16_quantized, x = input_13_cast_fp16)[name = tensor("linear_1_cast_fp16")]; tensor input_17_cast_fp16 = add(x = input_7_cast_fp16, y = linear_1_cast_fp16)[name = tensor("input_17_cast_fp16")]; tensor x_5_axes_0 = const()[name = tensor("x_5_axes_0"), val = tensor([-1])]; tensor encoder_layers_0_norm_pos_ff_weight_to_fp16 = const()[name = tensor("encoder_layers_0_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4525952)))]; tensor x_5_cast_fp16 = layer_norm(axes = x_5_axes_0, epsilon = var_15_to_fp16, gamma = encoder_layers_0_norm_pos_ff_weight_to_fp16, x = input_17_cast_fp16)[name = tensor("x_5_cast_fp16")]; tensor signal_1_perm_0 = const()[name = tensor("signal_1_perm_0"), val = tensor([0, 2, 1])]; tensor signal_1_cast_fp16 = transpose(perm = signal_1_perm_0, x = x_5_cast_fp16)[name = tensor("transpose_67")]; tensor input_19_cast_fp16 = mul(x = signal_1_cast_fp16, y = var_77_cast_fp16)[name = tensor("input_19_cast_fp16")]; tensor input_21_pad_0 = const()[name = tensor("input_21_pad_0"), val = tensor([0, 0, 0, 0, 2, 0])]; tensor input_21_mode_0 = const()[name = tensor("input_21_mode_0"), val = tensor("constant")]; tensor const_7_to_fp16 = const()[name = tensor("const_7_to_fp16"), val = tensor(0x0p+0)]; tensor input_21_cast_fp16 = pad(constant_val = const_7_to_fp16, mode = input_21_mode_0, pad = input_21_pad_0, x = input_19_cast_fp16)[name = tensor("input_21_cast_fp16")]; tensor conv_signal_1_pad_type_0 = const()[name = tensor("conv_signal_1_pad_type_0"), val = tensor("valid")]; tensor conv_signal_1_strides_0 = const()[name = tensor("conv_signal_1_strides_0"), val = tensor([1])]; tensor conv_signal_1_pad_0 = const()[name = tensor("conv_signal_1_pad_0"), val = tensor([0, 0])]; tensor conv_signal_1_dilations_0 = const()[name = tensor("conv_signal_1_dilations_0"), val = tensor([1])]; tensor conv_signal_1_groups_0 = const()[name = tensor("conv_signal_1_groups_0"), val = tensor(1)]; tensor encoder_layers_0_pos_ff_proj_conv_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_layers_0_pos_ff_proj_conv_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4527552))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11608640))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11605504)))]; tensor conv_signal_1_cast_fp16 = conv(dilations = conv_signal_1_dilations_0, groups = conv_signal_1_groups_0, pad = conv_signal_1_pad_0, pad_type = conv_signal_1_pad_type_0, strides = conv_signal_1_strides_0, weight = encoder_layers_0_pos_ff_proj_conv_weight_to_fp16_quantized, x = input_21_cast_fp16)[name = tensor("conv_signal_1_cast_fp16")]; tensor input_23_cast_fp16 = mul(x = conv_signal_1_cast_fp16, y = var_77_cast_fp16)[name = tensor("input_23_cast_fp16")]; tensor signal_3_mode_0 = const()[name = tensor("signal_3_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor signal_3_cast_fp16 = gelu(mode = signal_3_mode_0, x = input_23_cast_fp16)[name = tensor("signal_3_cast_fp16")]; tensor input_25_cast_fp16 = mul(x = signal_3_cast_fp16, y = var_77_cast_fp16)[name = tensor("input_25_cast_fp16")]; tensor input_27_pad_0 = const()[name = tensor("input_27_pad_0"), val = tensor([0, 0, 0, 0, 2, 0])]; tensor input_27_mode_0 = const()[name = tensor("input_27_mode_0"), val = tensor("constant")]; tensor const_8_to_fp16 = const()[name = tensor("const_8_to_fp16"), val = tensor(0x0p+0)]; tensor input_27_cast_fp16 = pad(constant_val = const_8_to_fp16, mode = input_27_mode_0, pad = input_27_pad_0, x = input_25_cast_fp16)[name = tensor("input_27_cast_fp16")]; tensor conv_signal_3_pad_type_0 = const()[name = tensor("conv_signal_3_pad_type_0"), val = tensor("valid")]; tensor conv_signal_3_strides_0 = const()[name = tensor("conv_signal_3_strides_0"), val = tensor([1])]; tensor conv_signal_3_pad_0 = const()[name = tensor("conv_signal_3_pad_0"), val = tensor([0, 0])]; tensor conv_signal_3_dilations_0 = const()[name = tensor("conv_signal_3_dilations_0"), val = tensor([1])]; tensor conv_signal_3_groups_0 = const()[name = tensor("conv_signal_3_groups_0"), val = tensor(1)]; tensor encoder_layers_0_pos_ff_o_net_conv_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_layers_0_pos_ff_o_net_conv_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11614848))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18692800))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4521920)))]; tensor conv_signal_3_cast_fp16 = conv(dilations = conv_signal_3_dilations_0, groups = conv_signal_3_groups_0, pad = conv_signal_3_pad_0, pad_type = conv_signal_3_pad_type_0, strides = conv_signal_3_strides_0, weight = encoder_layers_0_pos_ff_o_net_conv_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = tensor("conv_signal_3_cast_fp16")]; tensor var_141_cast_fp16 = mul(x = conv_signal_3_cast_fp16, y = var_77_cast_fp16)[name = tensor("op_141_cast_fp16")]; tensor input_29_perm_0 = const()[name = tensor("input_29_perm_0"), val = tensor([0, 2, 1])]; tensor input_29_cast_fp16 = transpose(perm = input_29_perm_0, x = var_141_cast_fp16)[name = tensor("transpose_66")]; tensor x_7_cast_fp16 = add(x = input_17_cast_fp16, y = input_29_cast_fp16)[name = tensor("x_7_cast_fp16")]; tensor x_9_cast_fp16 = mul(x = x_7_cast_fp16, y = var_57_cast_fp16)[name = tensor("x_9_cast_fp16")]; tensor input_31_cast_fp16 = mul(x = x_9_cast_fp16, y = var_57_cast_fp16)[name = tensor("input_31_cast_fp16")]; tensor query_3_axes_0 = const()[name = tensor("query_3_axes_0"), val = tensor([-1])]; tensor encoder_layers_1_norm_self_weight_to_fp16 = const()[name = tensor("encoder_layers_1_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18694400)))]; tensor query_3_cast_fp16 = layer_norm(axes = query_3_axes_0, epsilon = var_15_to_fp16, gamma = encoder_layers_1_norm_self_weight_to_fp16, x = input_31_cast_fp16)[name = tensor("query_3_cast_fp16")]; tensor encoder_layers_1_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("encoder_layers_1_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor