program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}})] { func main(tensor audio_emb, tensor encoder_mask, tensor encoder_output, tensor position, state> sa_k_0, state> sa_k_1, state> sa_k_10, state> sa_k_11, state> sa_k_2, state> sa_k_3, state> sa_k_4, state> sa_k_5, state> sa_k_6, state> sa_k_7, state> sa_k_8, state> sa_k_9, state> sa_v_0, state> sa_v_1, state> sa_v_10, state> sa_v_11, state> sa_v_2, state> sa_v_3, state> sa_v_4, state> sa_v_5, state> sa_v_6, state> sa_v_7, state> sa_v_8, state> sa_v_9, tensor xa_k_0, tensor xa_k_1, tensor xa_k_10, tensor xa_k_11, tensor xa_k_2, tensor xa_k_3, tensor xa_k_4, tensor xa_k_5, tensor xa_k_6, tensor xa_k_7, tensor xa_k_8, tensor xa_k_9, tensor xa_v_0, tensor xa_v_1, tensor xa_v_10, tensor xa_v_11, tensor xa_v_2, tensor xa_v_3, tensor xa_v_4, tensor xa_v_5, tensor xa_v_6, tensor xa_v_7, tensor xa_v_8, tensor xa_v_9) { int32 var_502_batch_dims_0 = const()[name = string("op_502_batch_dims_0"), val = int32(0)]; bool var_502_validate_indices_0 = const()[name = string("op_502_validate_indices_0"), val = bool(false)]; tensor dec_position_embeddings_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1572992))))[name = string("dec_position_embeddings_weight_to_fp16_quantized")]; string position_to_int16_dtype_0 = const()[name = string("position_to_int16_dtype_0"), val = string("int16")]; string cast_111_dtype_0 = const()[name = string("cast_111_dtype_0"), val = string("int32")]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor position_to_int16 = cast(dtype = position_to_int16_dtype_0, x = position)[name = string("cast_35")]; tensor cast_111 = cast(dtype = cast_111_dtype_0, x = position_to_int16)[name = string("cast_34")]; tensor greater_equal_0 = greater_equal(x = cast_111, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(2048)]; tensor add_0 = add(x = cast_111, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = cast_111, b = add_0, cond = greater_equal_0)[name = string("select_0")]; string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")]; string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("int32")]; int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)]; tensor select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_33")]; tensor cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = string("cast_32")]; tensor greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")]; int32 slice_by_index_0_1 = const()[name = string("slice_by_index_0_1"), val = int32(2048)]; tensor add_0_1 = add(x = cast_0, y = slice_by_index_0_1)[name = string("add_0_1")]; tensor select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")]; int32 op_502_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = string("op_502_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = int32(0)]; tensor op_502_cast_fp16_cast_uint16_cast_uint16 = gather(axis = op_502_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = var_502_batch_dims_0, indices = select_0_1, validate_indices = var_502_validate_indices_0, x = dec_position_embeddings_weight_to_fp16_quantized)[name = string("op_502_cast_fp16_cast_uint16_cast_uint16")]; string audio_emb_to_fp16_dtype_0 = const()[name = string("audio_emb_to_fp16_dtype_0"), val = string("fp16")]; tensor audio_emb_to_fp16 = cast(dtype = audio_emb_to_fp16_dtype_0, x = audio_emb)[name = string("cast_31")]; tensor input_3_cast_fp16 = add(x = audio_emb_to_fp16, y = op_502_cast_fp16_cast_uint16_cast_uint16)[name = string("input_3_cast_fp16")]; tensor idx_range_promoted_to_fp16 = const()[name = string("idx_range_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1577152)))]; string var_515_to_fp16_dtype_0 = const()[name = string("op_515_to_fp16_dtype_0"), val = string("fp16")]; tensor position_to_fp16 = cast(dtype = var_515_to_fp16_dtype_0, x = position)[name = string("cast_30")]; tensor var_516_cast_fp16 = less_equal(x = idx_range_promoted_to_fp16, y = position_to_fp16)[name = string("op_516_cast_fp16")]; tensor sa_key_mask_axes_0 = const()[name = string("sa_key_mask_axes_0"), val = tensor([0])]; string sa_key_mask_1_to_fp16_dtype_0 = const()[name = string("sa_key_mask_1_to_fp16_dtype_0"), val = string("fp16")]; tensor var_516_cast_fp16_to_fp16 = cast(dtype = sa_key_mask_1_to_fp16_dtype_0, x = var_516_cast_fp16)[name = string("cast_29")]; tensor sa_key_mask_cast_fp16 = expand_dims(axes = sa_key_mask_axes_0, x = var_516_cast_fp16_to_fp16)[name = string("sa_key_mask_cast_fp16")]; tensor input_5_axes_0 = const()[name = string("input_5_axes_0"), val = tensor([-1])]; tensor dec_layers_0_norm_self_weight_to_fp16 = const()[name = string("dec_layers_0_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1578432)))]; fp16 var_525_to_fp16 = const()[name = string("op_525_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_5_cast_fp16 = layer_norm(axes = input_5_axes_0, epsilon = var_525_to_fp16, gamma = dec_layers_0_norm_self_weight_to_fp16, x = input_3_cast_fp16)[name = string("input_5_cast_fp16")]; tensor dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1580032))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3349568))))[name = string("dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized")]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3354240)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor var_539 = const()[name = string("op_539"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_3_cast_fp16 = reshape(shape = var_539, x = linear_0_cast_fp16)[name = string("qkv_3_cast_fp16")]; tensor q_1_begin_0 = const()[name = string("q_1_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_1_end_0 = const()[name = string("q_1_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_1_end_mask_0 = const()[name = string("q_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_1_squeeze_mask_0 = const()[name = string("q_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_1_cast_fp16 = slice_by_index(begin = q_1_begin_0, end = q_1_end_0, end_mask = q_1_end_mask_0, squeeze_mask = q_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = string("q_1_cast_fp16")]; tensor new_k_1_begin_0 = const()[name = string("new_k_1_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_1_end_0 = const()[name = string("new_k_1_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_1_end_mask_0 = const()[name = string("new_k_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_1_squeeze_mask_0 = const()[name = string("new_k_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_1_cast_fp16 = slice_by_index(begin = new_k_1_begin_0, end = new_k_1_end_0, end_mask = new_k_1_end_mask_0, squeeze_mask = new_k_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = string("new_k_1_cast_fp16")]; tensor new_v_1_begin_0 = const()[name = string("new_v_1_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_1_end_0 = const()[name = string("new_v_1_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_1_end_mask_0 = const()[name = string("new_v_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_1_squeeze_mask_0 = const()[name = string("new_v_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_1_cast_fp16 = slice_by_index(begin = new_v_1_begin_0, end = new_v_1_end_0, end_mask = new_v_1_end_mask_0, squeeze_mask = new_v_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = string("new_v_1_cast_fp16")]; tensor var_585_cast_fp16 = equal(x = idx_range_promoted_to_fp16, y = position_to_fp16)[name = string("op_585_cast_fp16")]; string write_oh_1_dtype_0 = const()[name = string("write_oh_1_dtype_0"), val = string("fp16")]; tensor var_595 = const()[name = string("op_595"), val = tensor([1, 600, 1, 1])]; tensor write_oh_1 = cast(dtype = write_oh_1_dtype_0, x = var_585_cast_fp16)[name = string("cast_28")]; tensor write_oh_b_1 = reshape(shape = var_595, x = write_oh_1)[name = string("write_oh_b_1")]; tensor read_state_0 = read_state(input = sa_k_0)[name = string("read_state_0")]; tensor var_608_cast_fp16 = sub(x = new_k_1_cast_fp16, y = read_state_0)[name = string("op_608_cast_fp16")]; tensor var_609_cast_fp16 = mul(x = var_608_cast_fp16, y = write_oh_b_1)[name = string("op_609_cast_fp16")]; tensor sa_k_buf_3_cast_fp16 = add(x = read_state_0, y = var_609_cast_fp16)[name = string("sa_k_buf_3_cast_fp16")]; write_state(data = sa_k_buf_3_cast_fp16, input = sa_k_0)[name = string("coreml_update_state_24_write_state")]; tensor coreml_update_state_24 = read_state(input = sa_k_0)[name = string("coreml_update_state_24")]; tensor read_state_1 = read_state(input = sa_v_0)[name = string("read_state_1")]; tensor var_613_cast_fp16 = sub(x = new_v_1_cast_fp16, y = read_state_1)[name = string("op_613_cast_fp16")]; tensor var_614_cast_fp16 = mul(x = var_613_cast_fp16, y = write_oh_b_1)[name = string("op_614_cast_fp16")]; tensor sa_v_buf_3_cast_fp16 = add(x = read_state_1, y = var_614_cast_fp16)[name = string("sa_v_buf_3_cast_fp16")]; write_state(data = sa_v_buf_3_cast_fp16, input = sa_v_0)[name = string("coreml_update_state_25_write_state")]; tensor coreml_update_state_25 = read_state(input = sa_v_0)[name = string("coreml_update_state_25")]; tensor var_643 = const()[name = string("op_643"), val = tensor([0, 2, -3, -1])]; bool var_645_transpose_x_0 = const()[name = string("op_645_transpose_x_0"), val = bool(false)]; bool var_645_transpose_y_0 = const()[name = string("op_645_transpose_y_0"), val = bool(false)]; tensor transpose_96_perm_0 = const()[name = string("transpose_96_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_97_perm_0 = const()[name = string("transpose_97_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_97 = transpose(perm = transpose_97_perm_0, x = coreml_update_state_24)[name = string("transpose_262")]; tensor transpose_96 = transpose(perm = transpose_96_perm_0, x = q_1_cast_fp16)[name = string("transpose_263")]; tensor var_645_cast_fp16 = matmul(transpose_x = var_645_transpose_x_0, transpose_y = var_645_transpose_y_0, x = transpose_96, y = transpose_97)[name = string("op_645_cast_fp16")]; fp16 var_646_to_fp16 = const()[name = string("op_646_to_fp16"), val = fp16(0x1p-3)]; tensor scores_1_cast_fp16 = mul(x = var_645_cast_fp16, y = var_646_to_fp16)[name = string("scores_1_cast_fp16")]; tensor var_654_axes_0 = const()[name = string("op_654_axes_0"), val = tensor([1])]; tensor var_654_cast_fp16 = expand_dims(axes = var_654_axes_0, x = sa_key_mask_cast_fp16)[name = string("op_654_cast_fp16")]; tensor var_656_axes_0 = const()[name = string("op_656_axes_0"), val = tensor([2])]; tensor var_656_cast_fp16 = expand_dims(axes = var_656_axes_0, x = var_654_cast_fp16)[name = string("op_656_cast_fp16")]; fp16 var_662_promoted_to_fp16 = const()[name = string("op_662_promoted_to_fp16"), val = fp16(0x0p+0)]; tensor var_663_cast_fp16 = equal(x = var_656_cast_fp16, y = var_662_promoted_to_fp16)[name = string("op_663_cast_fp16")]; fp16 var_664_to_fp16 = const()[name = string("op_664_to_fp16"), val = fp16(-inf)]; tensor scores_3_cast_fp16 = select(a = var_664_to_fp16, b = scores_1_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_3_cast_fp16")]; int32 var_666 = const()[name = string("op_666"), val = int32(-1)]; tensor probs_1_cast_fp16 = softmax(axis = var_666, x = scores_3_cast_fp16)[name = string("probs_1_cast_fp16")]; bool var_669_transpose_x_0 = const()[name = string("op_669_transpose_x_0"), val = bool(false)]; bool var_669_transpose_y_0 = const()[name = string("op_669_transpose_y_0"), val = bool(false)]; tensor v_t_1_cast_fp16 = transpose(perm = var_643, x = coreml_update_state_25)[name = string("transpose_261")]; tensor var_669_cast_fp16 = matmul(transpose_x = var_669_transpose_x_0, transpose_y = var_669_transpose_y_0, x = probs_1_cast_fp16, y = v_t_1_cast_fp16)[name = string("op_669_cast_fp16")]; tensor var_674 = const()[name = string("op_674"), val = tensor([0, 2, 1, 3])]; tensor var_679 = const()[name = string("op_679"), val = tensor([1, 1, -1])]; tensor var_675_cast_fp16 = transpose(perm = var_674, x = var_669_cast_fp16)[name = string("transpose_260")]; tensor input_7_cast_fp16 = reshape(shape = var_679, x = var_675_cast_fp16)[name = string("input_7_cast_fp16")]; tensor dec_layers_0_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3358912))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3948800))))[name = string("dec_layers_0_self_attention_o_net_weight_to_fp16_quantized")]; tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3950400)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_0_self_attention_o_net_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor input_9_cast_fp16 = add(x = input_3_cast_fp16, y = linear_1_cast_fp16)[name = string("input_9_cast_fp16")]; tensor input_11_axes_0 = const()[name = string("input_11_axes_0"), val = tensor([-1])]; tensor dec_layers_0_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_0_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3952000)))]; fp16 var_687_to_fp16 = const()[name = string("op_687_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_11_cast_fp16 = layer_norm(axes = input_11_axes_0, epsilon = var_687_to_fp16, gamma = dec_layers_0_norm_xattn_query_weight_to_fp16, x = input_9_cast_fp16)[name = string("input_11_cast_fp16")]; tensor dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3953600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4051968))))[name = string("dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized")]; tensor linear_2_bias_0_to_fp16 = const()[name = string("linear_2_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4052288)))]; tensor linear_2_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized, x = input_11_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor var_700 = const()[name = string("op_700"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_1_cast_fp16 = reshape(shape = var_700, x = linear_2_cast_fp16)[name = string("xq_proj_1_cast_fp16")]; tensor var_718 = const()[name = string("op_718"), val = tensor([0, 2, -3, -1])]; string xa_v_0_to_fp16_dtype_0 = const()[name = string("xa_v_0_to_fp16_dtype_0"), val = string("fp16")]; bool var_720_transpose_x_0 = const()[name = string("op_720_transpose_x_0"), val = bool(false)]; bool var_720_transpose_y_0 = const()[name = string("op_720_transpose_y_0"), val = bool(false)]; string xa_k_0_to_fp16_dtype_0 = const()[name = string("xa_k_0_to_fp16_dtype_0"), val = string("fp16")]; tensor transpose_98_perm_0 = const()[name = string("transpose_98_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_99_perm_0 = const()[name = string("transpose_99_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_0_to_fp16 = cast(dtype = xa_k_0_to_fp16_dtype_0, x = xa_k_0)[name = string("cast_27")]; tensor transpose_99 = transpose(perm = transpose_99_perm_0, x = xa_k_0_to_fp16)[name = string("transpose_258")]; tensor transpose_98 = transpose(perm = transpose_98_perm_0, x = xq_proj_1_cast_fp16)[name = string("transpose_259")]; tensor var_720_cast_fp16 = matmul(transpose_x = var_720_transpose_x_0, transpose_y = var_720_transpose_y_0, x = transpose_98, y = transpose_99)[name = string("op_720_cast_fp16")]; fp16 var_721_to_fp16 = const()[name = string("op_721_to_fp16"), val = fp16(0x1.6ap-4)]; tensor xscores_1_cast_fp16 = mul(x = var_720_cast_fp16, y = var_721_to_fp16)[name = string("xscores_1_cast_fp16")]; tensor var_729_axes_0 = const()[name = string("op_729_axes_0"), val = tensor([1])]; string encoder_mask_to_fp16_dtype_0 = const()[name = string("encoder_mask_to_fp16_dtype_0"), val = string("fp16")]; tensor encoder_mask_to_fp16 = cast(dtype = encoder_mask_to_fp16_dtype_0, x = encoder_mask)[name = string("cast_26")]; tensor var_729_cast_fp16 = expand_dims(axes = var_729_axes_0, x = encoder_mask_to_fp16)[name = string("op_729_cast_fp16")]; tensor var_731_axes_0 = const()[name = string("op_731_axes_0"), val = tensor([2])]; tensor var_731_cast_fp16 = expand_dims(axes = var_731_axes_0, x = var_729_cast_fp16)[name = string("op_731_cast_fp16")]; fp16 var_737_promoted_to_fp16 = const()[name = string("op_737_promoted_to_fp16"), val = fp16(0x0p+0)]; tensor var_738_cast_fp16 = equal(x = var_731_cast_fp16, y = var_737_promoted_to_fp16)[name = string("op_738_cast_fp16")]; fp16 var_739_to_fp16 = const()[name = string("op_739_to_fp16"), val = fp16(-inf)]; tensor xscores_3_cast_fp16 = select(a = var_739_to_fp16, b = xscores_1_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_3_cast_fp16")]; int32 var_741 = const()[name = string("op_741"), val = int32(-1)]; tensor xprobs_1_cast_fp16 = softmax(axis = var_741, x = xscores_3_cast_fp16)[name = string("xprobs_1_cast_fp16")]; bool var_744_transpose_x_0 = const()[name = string("op_744_transpose_x_0"), val = bool(false)]; bool var_744_transpose_y_0 = const()[name = string("op_744_transpose_y_0"), val = bool(false)]; tensor xa_v_0_to_fp16 = cast(dtype = xa_v_0_to_fp16_dtype_0, x = xa_v_0)[name = string("cast_25")]; tensor xvT_1_cast_fp16 = transpose(perm = var_718, x = xa_v_0_to_fp16)[name = string("transpose_257")]; tensor var_744_cast_fp16 = matmul(transpose_x = var_744_transpose_x_0, transpose_y = var_744_transpose_y_0, x = xprobs_1_cast_fp16, y = xvT_1_cast_fp16)[name = string("op_744_cast_fp16")]; tensor var_749 = const()[name = string("op_749"), val = tensor([0, 2, 1, 3])]; tensor var_754 = const()[name = string("op_754"), val = tensor([1, 1, -1])]; tensor var_750_cast_fp16 = transpose(perm = var_749, x = var_744_cast_fp16)[name = string("transpose_256")]; tensor input_13_cast_fp16 = reshape(shape = var_754, x = var_750_cast_fp16)[name = string("input_13_cast_fp16")]; tensor dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4052608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4150976))))[name = string("dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized")]; tensor linear_3_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized, x = input_13_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor input_15_cast_fp16 = add(x = input_9_cast_fp16, y = linear_3_cast_fp16)[name = string("input_15_cast_fp16")]; tensor x_1_axes_0 = const()[name = string("x_1_axes_0"), val = tensor([-1])]; tensor dec_layers_0_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_0_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4152576)))]; fp16 var_762_to_fp16 = const()[name = string("op_762_to_fp16"), val = fp16(0x1.5p-17)]; tensor x_1_cast_fp16 = layer_norm(axes = x_1_axes_0, epsilon = var_762_to_fp16, gamma = dec_layers_0_norm_pos_ff_weight_to_fp16, x = input_15_cast_fp16)[name = string("x_1_cast_fp16")]; tensor var_778 = const()[name = string("op_778"), val = tensor([0, 2, 1])]; string y_1_pad_type_0 = const()[name = string("y_1_pad_type_0"), val = string("valid")]; tensor y_1_strides_0 = const()[name = string("y_1_strides_0"), val = tensor([1])]; tensor y_1_pad_0 = const()[name = string("y_1_pad_0"), val = tensor([0, 0])]; tensor y_1_dilations_0 = const()[name = string("y_1_dilations_0"), val = tensor([1])]; int32 y_1_groups_0 = const()[name = string("y_1_groups_0"), val = int32(1)]; tensor dec_layers_0_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4154176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6513536))))[name = string("dec_layers_0_pos_ff_proj_weight_to_fp16_quantized")]; tensor x_3_cast_fp16 = transpose(perm = var_778, x = x_1_cast_fp16)[name = string("transpose_255")]; tensor y_1_cast_fp16 = conv(dilations = y_1_dilations_0, groups = y_1_groups_0, pad = y_1_pad_0, pad_type = y_1_pad_type_0, strides = y_1_strides_0, weight = dec_layers_0_pos_ff_proj_weight_to_fp16_quantized, x = x_3_cast_fp16)[name = string("y_1_cast_fp16")]; string x_5_mode_0 = const()[name = string("x_5_mode_0"), val = string("TANH_APPROXIMATION")]; tensor x_5_cast_fp16 = gelu(mode = x_5_mode_0, x = y_1_cast_fp16)[name = string("x_5_cast_fp16")]; string y_3_pad_type_0 = const()[name = string("y_3_pad_type_0"), val = string("valid")]; tensor y_3_strides_0 = const()[name = string("y_3_strides_0"), val = tensor([1])]; tensor y_3_pad_0 = const()[name = string("y_3_pad_0"), val = tensor([0, 0])]; tensor y_3_dilations_0 = const()[name = string("y_3_dilations_0"), val = tensor([1])]; int32 y_3_groups_0 = const()[name = string("y_3_groups_0"), val = int32(1)]; tensor dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6519744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8879104))))[name = string("dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized")]; tensor y_3_cast_fp16 = conv(dilations = y_3_dilations_0, groups = y_3_groups_0, pad = y_3_pad_0, pad_type = y_3_pad_type_0, strides = y_3_strides_0, weight = dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized, x = x_5_cast_fp16)[name = string("y_3_cast_fp16")]; tensor var_796 = const()[name = string("op_796"), val = tensor([0, 2, 1])]; tensor var_797_cast_fp16 = transpose(perm = var_796, x = y_3_cast_fp16)[name = string("transpose_254")]; tensor input_17_cast_fp16 = add(x = input_15_cast_fp16, y = var_797_cast_fp16)[name = string("input_17_cast_fp16")]; tensor input_19_axes_0 = const()[name = string("input_19_axes_0"), val = tensor([-1])]; tensor dec_layers_1_norm_self_weight_to_fp16 = const()[name = string("dec_layers_1_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8880704)))]; fp16 var_801_to_fp16 = const()[name = string("op_801_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_19_cast_fp16 = layer_norm(axes = input_19_axes_0, epsilon = var_801_to_fp16, gamma = dec_layers_1_norm_self_weight_to_fp16, x = input_17_cast_fp16)[name = string("input_19_cast_fp16")]; tensor dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8882304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10651840))))[name = string("dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized")]; tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor var_815 = const()[name = string("op_815"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_7_cast_fp16 = reshape(shape = var_815, x = linear_4_cast_fp16)[name = string("qkv_7_cast_fp16")]; tensor q_3_begin_0 = const()[name = string("q_3_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_3_end_0 = const()[name = string("q_3_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_3_end_mask_0 = const()[name = string("q_3_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_3_squeeze_mask_0 = const()[name = string("q_3_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_3_cast_fp16 = slice_by_index(begin = q_3_begin_0, end = q_3_end_0, end_mask = q_3_end_mask_0, squeeze_mask = q_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = string("q_3_cast_fp16")]; tensor new_k_3_begin_0 = const()[name = string("new_k_3_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_3_end_0 = const()[name = string("new_k_3_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_3_end_mask_0 = const()[name = string("new_k_3_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_3_squeeze_mask_0 = const()[name = string("new_k_3_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_3_cast_fp16 = slice_by_index(begin = new_k_3_begin_0, end = new_k_3_end_0, end_mask = new_k_3_end_mask_0, squeeze_mask = new_k_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = string("new_k_3_cast_fp16")]; tensor new_v_3_begin_0 = const()[name = string("new_v_3_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_3_end_0 = const()[name = string("new_v_3_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_3_end_mask_0 = const()[name = string("new_v_3_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_3_squeeze_mask_0 = const()[name = string("new_v_3_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_3_cast_fp16 = slice_by_index(begin = new_v_3_begin_0, end = new_v_3_end_0, end_mask = new_v_3_end_mask_0, squeeze_mask = new_v_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = string("new_v_3_cast_fp16")]; tensor read_state_2 = read_state(input = sa_k_1)[name = string("read_state_2")]; tensor var_884_cast_fp16 = sub(x = new_k_3_cast_fp16, y = read_state_2)[name = string("op_884_cast_fp16")]; tensor var_885_cast_fp16 = mul(x = var_884_cast_fp16, y = write_oh_b_1)[name = string("op_885_cast_fp16")]; tensor sa_k_buf_7_cast_fp16 = add(x = read_state_2, y = var_885_cast_fp16)[name = string("sa_k_buf_7_cast_fp16")]; write_state(data = sa_k_buf_7_cast_fp16, input = sa_k_1)[name = string("coreml_update_state_26_write_state")]; tensor coreml_update_state_26 = read_state(input = sa_k_1)[name = string("coreml_update_state_26")]; tensor read_state_3 = read_state(input = sa_v_1)[name = string("read_state_3")]; tensor var_889_cast_fp16 = sub(x = new_v_3_cast_fp16, y = read_state_3)[name = string("op_889_cast_fp16")]; tensor var_890_cast_fp16 = mul(x = var_889_cast_fp16, y = write_oh_b_1)[name = string("op_890_cast_fp16")]; tensor sa_v_buf_7_cast_fp16 = add(x = read_state_3, y = var_890_cast_fp16)[name = string("sa_v_buf_7_cast_fp16")]; write_state(data = sa_v_buf_7_cast_fp16, input = sa_v_1)[name = string("coreml_update_state_27_write_state")]; tensor coreml_update_state_27 = read_state(input = sa_v_1)[name = string("coreml_update_state_27")]; tensor var_919 = const()[name = string("op_919"), val = tensor([0, 2, -3, -1])]; bool var_921_transpose_x_0 = const()[name = string("op_921_transpose_x_0"), val = bool(false)]; bool var_921_transpose_y_0 = const()[name = string("op_921_transpose_y_0"), val = bool(false)]; tensor transpose_100_perm_0 = const()[name = string("transpose_100_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_101_perm_0 = const()[name = string("transpose_101_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_101 = transpose(perm = transpose_101_perm_0, x = coreml_update_state_26)[name = string("transpose_252")]; tensor transpose_100 = transpose(perm = transpose_100_perm_0, x = q_3_cast_fp16)[name = string("transpose_253")]; tensor var_921_cast_fp16 = matmul(transpose_x = var_921_transpose_x_0, transpose_y = var_921_transpose_y_0, x = transpose_100, y = transpose_101)[name = string("op_921_cast_fp16")]; fp16 var_922_to_fp16 = const()[name = string("op_922_to_fp16"), val = fp16(0x1p-3)]; tensor scores_5_cast_fp16 = mul(x = var_921_cast_fp16, y = var_922_to_fp16)[name = string("scores_5_cast_fp16")]; fp16 var_940_to_fp16 = const()[name = string("op_940_to_fp16"), val = fp16(-inf)]; tensor scores_7_cast_fp16 = select(a = var_940_to_fp16, b = scores_5_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_7_cast_fp16")]; int32 var_942 = const()[name = string("op_942"), val = int32(-1)]; tensor probs_3_cast_fp16 = softmax(axis = var_942, x = scores_7_cast_fp16)[name = string("probs_3_cast_fp16")]; bool var_945_transpose_x_0 = const()[name = string("op_945_transpose_x_0"), val = bool(false)]; bool var_945_transpose_y_0 = const()[name = string("op_945_transpose_y_0"), val = bool(false)]; tensor v_t_3_cast_fp16 = transpose(perm = var_919, x = coreml_update_state_27)[name = string("transpose_251")]; tensor var_945_cast_fp16 = matmul(transpose_x = var_945_transpose_x_0, transpose_y = var_945_transpose_y_0, x = probs_3_cast_fp16, y = v_t_3_cast_fp16)[name = string("op_945_cast_fp16")]; tensor var_950 = const()[name = string("op_950"), val = tensor([0, 2, 1, 3])]; tensor var_955 = const()[name = string("op_955"), val = tensor([1, 1, -1])]; tensor var_951_cast_fp16 = transpose(perm = var_950, x = var_945_cast_fp16)[name = string("transpose_250")]; tensor input_21_cast_fp16 = reshape(shape = var_955, x = var_951_cast_fp16)[name = string("input_21_cast_fp16")]; tensor dec_layers_1_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10656512))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11246400))))[name = string("dec_layers_1_self_attention_o_net_weight_to_fp16_quantized")]; tensor linear_5_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_1_self_attention_o_net_weight_to_fp16_quantized, x = input_21_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor input_23_cast_fp16 = add(x = input_17_cast_fp16, y = linear_5_cast_fp16)[name = string("input_23_cast_fp16")]; tensor input_25_axes_0 = const()[name = string("input_25_axes_0"), val = tensor([-1])]; tensor dec_layers_1_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_1_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11248000)))]; fp16 var_963_to_fp16 = const()[name = string("op_963_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_25_cast_fp16 = layer_norm(axes = input_25_axes_0, epsilon = var_963_to_fp16, gamma = dec_layers_1_norm_xattn_query_weight_to_fp16, x = input_23_cast_fp16)[name = string("input_25_cast_fp16")]; tensor dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11249600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11347968))))[name = string("dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized")]; tensor linear_6_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized, x = input_25_cast_fp16)[name = string("linear_6_cast_fp16")]; tensor var_976 = const()[name = string("op_976"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_3_cast_fp16 = reshape(shape = var_976, x = linear_6_cast_fp16)[name = string("xq_proj_3_cast_fp16")]; tensor var_994 = const()[name = string("op_994"), val = tensor([0, 2, -3, -1])]; string xa_v_1_to_fp16_dtype_0 = const()[name = string("xa_v_1_to_fp16_dtype_0"), val = string("fp16")]; bool var_996_transpose_x_0 = const()[name = string("op_996_transpose_x_0"), val = bool(false)]; bool var_996_transpose_y_0 = const()[name = string("op_996_transpose_y_0"), val = bool(false)]; string xa_k_1_to_fp16_dtype_0 = const()[name = string("xa_k_1_to_fp16_dtype_0"), val = string("fp16")]; tensor transpose_102_perm_0 = const()[name = string("transpose_102_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_103_perm_0 = const()[name = string("transpose_103_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_1_to_fp16 = cast(dtype = xa_k_1_to_fp16_dtype_0, x = xa_k_1)[name = string("cast_24")]; tensor transpose_103 = transpose(perm = transpose_103_perm_0, x = xa_k_1_to_fp16)[name = string("transpose_248")]; tensor transpose_102 = transpose(perm = transpose_102_perm_0, x = xq_proj_3_cast_fp16)[name = string("transpose_249")]; tensor var_996_cast_fp16 = matmul(transpose_x = var_996_transpose_x_0, transpose_y = var_996_transpose_y_0, x = transpose_102, y = transpose_103)[name = string("op_996_cast_fp16")]; fp16 var_997_to_fp16 = const()[name = string("op_997_to_fp16"), val = fp16(0x1.6ap-4)]; tensor xscores_5_cast_fp16 = mul(x = var_996_cast_fp16, y = var_997_to_fp16)[name = string("xscores_5_cast_fp16")]; fp16 var_1015_to_fp16 = const()[name = string("op_1015_to_fp16"), val = fp16(-inf)]; tensor xscores_7_cast_fp16 = select(a = var_1015_to_fp16, b = xscores_5_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_7_cast_fp16")]; int32 var_1017 = const()[name = string("op_1017"), val = int32(-1)]; tensor xprobs_3_cast_fp16 = softmax(axis = var_1017, x = xscores_7_cast_fp16)[name = string("xprobs_3_cast_fp16")]; bool var_1020_transpose_x_0 = const()[name = string("op_1020_transpose_x_0"), val = bool(false)]; bool var_1020_transpose_y_0 = const()[name = string("op_1020_transpose_y_0"), val = bool(false)]; tensor xa_v_1_to_fp16 = cast(dtype = xa_v_1_to_fp16_dtype_0, x = xa_v_1)[name = string("cast_23")]; tensor xvT_3_cast_fp16 = transpose(perm = var_994, x = xa_v_1_to_fp16)[name = string("transpose_247")]; tensor var_1020_cast_fp16 = matmul(transpose_x = var_1020_transpose_x_0, transpose_y = var_1020_transpose_y_0, x = xprobs_3_cast_fp16, y = xvT_3_cast_fp16)[name = string("op_1020_cast_fp16")]; tensor var_1025 = const()[name = string("op_1025"), val = tensor([0, 2, 1, 3])]; tensor var_1030 = const()[name = string("op_1030"), val = tensor([1, 1, -1])]; tensor var_1026_cast_fp16 = transpose(perm = var_1025, x = var_1020_cast_fp16)[name = string("transpose_246")]; tensor input_27_cast_fp16 = reshape(shape = var_1030, x = var_1026_cast_fp16)[name = string("input_27_cast_fp16")]; tensor dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11348288))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11446656))))[name = string("dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized")]; tensor linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor input_29_cast_fp16 = add(x = input_23_cast_fp16, y = linear_7_cast_fp16)[name = string("input_29_cast_fp16")]; tensor x_9_axes_0 = const()[name = string("x_9_axes_0"), val = tensor([-1])]; tensor dec_layers_1_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_1_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11448256)))]; fp16 var_1038_to_fp16 = const()[name = string("op_1038_to_fp16"), val = fp16(0x1.5p-17)]; tensor x_9_cast_fp16 = layer_norm(axes = x_9_axes_0, epsilon = var_1038_to_fp16, gamma = dec_layers_1_norm_pos_ff_weight_to_fp16, x = input_29_cast_fp16)[name = string("x_9_cast_fp16")]; tensor var_1054 = const()[name = string("op_1054"), val = tensor([0, 2, 1])]; string y_5_pad_type_0 = const()[name = string("y_5_pad_type_0"), val = string("valid")]; tensor y_5_strides_0 = const()[name = string("y_5_strides_0"), val = tensor([1])]; tensor y_5_pad_0 = const()[name = string("y_5_pad_0"), val = tensor([0, 0])]; tensor y_5_dilations_0 = const()[name = string("y_5_dilations_0"), val = tensor([1])]; int32 y_5_groups_0 = const()[name = string("y_5_groups_0"), val = int32(1)]; tensor dec_layers_1_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11449856))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13809216))))[name = string("dec_layers_1_pos_ff_proj_weight_to_fp16_quantized")]; tensor x_11_cast_fp16 = transpose(perm = var_1054, x = x_9_cast_fp16)[name = string("transpose_245")]; tensor y_5_cast_fp16 = conv(dilations = y_5_dilations_0, groups = y_5_groups_0, pad = y_5_pad_0, pad_type = y_5_pad_type_0, strides = y_5_strides_0, weight = dec_layers_1_pos_ff_proj_weight_to_fp16_quantized, x = x_11_cast_fp16)[name = string("y_5_cast_fp16")]; string x_13_mode_0 = const()[name = string("x_13_mode_0"), val = string("TANH_APPROXIMATION")]; tensor x_13_cast_fp16 = gelu(mode = x_13_mode_0, x = y_5_cast_fp16)[name = string("x_13_cast_fp16")]; string y_7_pad_type_0 = const()[name = string("y_7_pad_type_0"), val = string("valid")]; tensor y_7_strides_0 = const()[name = string("y_7_strides_0"), val = tensor([1])]; tensor y_7_pad_0 = const()[name = string("y_7_pad_0"), val = tensor([0, 0])]; tensor y_7_dilations_0 = const()[name = string("y_7_dilations_0"), val = tensor([1])]; int32 y_7_groups_0 = const()[name = string("y_7_groups_0"), val = int32(1)]; tensor dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13815424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16174784))))[name = string("dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized")]; tensor y_7_cast_fp16 = conv(dilations = y_7_dilations_0, groups = y_7_groups_0, pad = y_7_pad_0, pad_type = y_7_pad_type_0, strides = y_7_strides_0, weight = dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized, x = x_13_cast_fp16)[name = string("y_7_cast_fp16")]; tensor var_1072 = const()[name = string("op_1072"), val = tensor([0, 2, 1])]; tensor var_1073_cast_fp16 = transpose(perm = var_1072, x = y_7_cast_fp16)[name = string("transpose_244")]; tensor input_31_cast_fp16 = add(x = input_29_cast_fp16, y = var_1073_cast_fp16)[name = string("input_31_cast_fp16")]; tensor input_33_axes_0 = const()[name = string("input_33_axes_0"), val = tensor([-1])]; tensor dec_layers_2_norm_self_weight_to_fp16 = const()[name = string("dec_layers_2_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16176384)))]; fp16 var_1077_to_fp16 = const()[name = string("op_1077_to_fp16"), val = fp16(0x1.5p-17)]; tensor