program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}})] { func main(tensor audio_emb, tensor encoder_mask, tensor encoder_output, tensor position, state> sa_k_0, state> sa_k_1, state> sa_k_10, state> sa_k_11, state> sa_k_2, state> sa_k_3, state> sa_k_4, state> sa_k_5, state> sa_k_6, state> sa_k_7, state> sa_k_8, state> sa_k_9, state> sa_v_0, state> sa_v_1, state> sa_v_10, state> sa_v_11, state> sa_v_2, state> sa_v_3, state> sa_v_4, state> sa_v_5, state> sa_v_6, state> sa_v_7, state> sa_v_8, state> sa_v_9, tensor xa_k_0, tensor xa_k_1, tensor xa_k_10, tensor xa_k_11, tensor xa_k_2, tensor xa_k_3, tensor xa_k_4, tensor xa_k_5, tensor xa_k_6, tensor xa_k_7, tensor xa_k_8, tensor xa_k_9, tensor xa_v_0, tensor xa_v_1, tensor xa_v_10, tensor xa_v_11, tensor xa_v_2, tensor xa_v_3, tensor xa_v_4, tensor xa_v_5, tensor xa_v_6, tensor xa_v_7, tensor xa_v_8, tensor xa_v_9) { int32 var_502_batch_dims_0 = const()[name = string("op_502_batch_dims_0"), val = int32(0)]; bool var_502_validate_indices_0 = const()[name = string("op_502_validate_indices_0"), val = bool(false)]; tensor dec_position_embeddings_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1572992))))[name = string("dec_position_embeddings_weight_to_fp16_quantized")]; string position_to_int16_dtype_0 = const()[name = string("position_to_int16_dtype_0"), val = string("int16")]; string cast_111_dtype_0 = const()[name = string("cast_111_dtype_0"), val = string("int32")]; int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)]; tensor position_to_int16 = cast(dtype = position_to_int16_dtype_0, x = position)[name = string("cast_35")]; tensor cast_111 = cast(dtype = cast_111_dtype_0, x = position_to_int16)[name = string("cast_34")]; tensor greater_equal_0 = greater_equal(x = cast_111, y = greater_equal_0_y_0)[name = string("greater_equal_0")]; int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(2048)]; tensor add_0 = add(x = cast_111, y = slice_by_index_0)[name = string("add_0")]; tensor select_0 = select(a = cast_111, b = add_0, cond = greater_equal_0)[name = string("select_0")]; string select_0_to_int16_dtype_0 = const()[name = string("select_0_to_int16_dtype_0"), val = string("int16")]; string cast_0_dtype_0 = const()[name = string("cast_0_dtype_0"), val = string("int32")]; int32 greater_equal_0_y_0_1 = const()[name = string("greater_equal_0_y_0_1"), val = int32(0)]; tensor select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = string("cast_33")]; tensor cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = string("cast_32")]; tensor greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = string("greater_equal_0_1")]; int32 slice_by_index_0_1 = const()[name = string("slice_by_index_0_1"), val = int32(2048)]; tensor add_0_1 = add(x = cast_0, y = slice_by_index_0_1)[name = string("add_0_1")]; tensor select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = string("select_0_1")]; int32 op_502_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = string("op_502_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = int32(0)]; tensor op_502_cast_fp16_cast_uint16_cast_uint16 = gather(axis = op_502_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = var_502_batch_dims_0, indices = select_0_1, validate_indices = var_502_validate_indices_0, x = dec_position_embeddings_weight_to_fp16_quantized)[name = string("op_502_cast_fp16_cast_uint16_cast_uint16")]; string audio_emb_to_fp16_dtype_0 = const()[name = string("audio_emb_to_fp16_dtype_0"), val = string("fp16")]; tensor audio_emb_to_fp16 = cast(dtype = audio_emb_to_fp16_dtype_0, x = audio_emb)[name = string("cast_31")]; tensor input_3_cast_fp16 = add(x = audio_emb_to_fp16, y = op_502_cast_fp16_cast_uint16_cast_uint16)[name = string("input_3_cast_fp16")]; tensor idx_range_promoted_to_fp16 = const()[name = string("idx_range_promoted_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1577152)))]; string var_515_to_fp16_dtype_0 = const()[name = string("op_515_to_fp16_dtype_0"), val = string("fp16")]; tensor position_to_fp16 = cast(dtype = var_515_to_fp16_dtype_0, x = position)[name = string("cast_30")]; tensor var_516_cast_fp16 = less_equal(x = idx_range_promoted_to_fp16, y = position_to_fp16)[name = string("op_516_cast_fp16")]; tensor sa_key_mask_axes_0 = const()[name = string("sa_key_mask_axes_0"), val = tensor([0])]; string sa_key_mask_1_to_fp16_dtype_0 = const()[name = string("sa_key_mask_1_to_fp16_dtype_0"), val = string("fp16")]; tensor var_516_cast_fp16_to_fp16 = cast(dtype = sa_key_mask_1_to_fp16_dtype_0, x = var_516_cast_fp16)[name = string("cast_29")]; tensor sa_key_mask_cast_fp16 = expand_dims(axes = sa_key_mask_axes_0, x = var_516_cast_fp16_to_fp16)[name = string("sa_key_mask_cast_fp16")]; tensor input_5_axes_0 = const()[name = string("input_5_axes_0"), val = tensor([-1])]; tensor dec_layers_0_norm_self_weight_to_fp16 = const()[name = string("dec_layers_0_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1578432)))]; fp16 var_525_to_fp16 = const()[name = string("op_525_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_5_cast_fp16 = layer_norm(axes = input_5_axes_0, epsilon = var_525_to_fp16, gamma = dec_layers_0_norm_self_weight_to_fp16, x = input_3_cast_fp16)[name = string("input_5_cast_fp16")]; tensor dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1580032))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3349568))))[name = string("dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized")]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3354240)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor var_539 = const()[name = string("op_539"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_3_cast_fp16 = reshape(shape = var_539, x = linear_0_cast_fp16)[name = string("qkv_3_cast_fp16")]; tensor q_1_begin_0 = const()[name = string("q_1_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_1_end_0 = const()[name = string("q_1_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_1_end_mask_0 = const()[name = string("q_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_1_squeeze_mask_0 = const()[name = string("q_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_1_cast_fp16 = slice_by_index(begin = q_1_begin_0, end = q_1_end_0, end_mask = q_1_end_mask_0, squeeze_mask = q_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = string("q_1_cast_fp16")]; tensor new_k_1_begin_0 = const()[name = string("new_k_1_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_1_end_0 = const()[name = string("new_k_1_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_1_end_mask_0 = const()[name = string("new_k_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_1_squeeze_mask_0 = const()[name = string("new_k_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_1_cast_fp16 = slice_by_index(begin = new_k_1_begin_0, end = new_k_1_end_0, end_mask = new_k_1_end_mask_0, squeeze_mask = new_k_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = string("new_k_1_cast_fp16")]; tensor new_v_1_begin_0 = const()[name = string("new_v_1_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_1_end_0 = const()[name = string("new_v_1_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_1_end_mask_0 = const()[name = string("new_v_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_1_squeeze_mask_0 = const()[name = string("new_v_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_1_cast_fp16 = slice_by_index(begin = new_v_1_begin_0, end = new_v_1_end_0, end_mask = new_v_1_end_mask_0, squeeze_mask = new_v_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = string("new_v_1_cast_fp16")]; tensor var_585_cast_fp16 = equal(x = idx_range_promoted_to_fp16, y = position_to_fp16)[name = string("op_585_cast_fp16")]; string write_oh_1_dtype_0 = const()[name = string("write_oh_1_dtype_0"), val = string("fp16")]; tensor var_595 = const()[name = string("op_595"), val = tensor([1, 600, 1, 1])]; tensor write_oh_1 = cast(dtype = write_oh_1_dtype_0, x = var_585_cast_fp16)[name = string("cast_28")]; tensor write_oh_b_1 = reshape(shape = var_595, x = write_oh_1)[name = string("write_oh_b_1")]; tensor read_state_0 = read_state(input = sa_k_0)[name = string("read_state_0")]; tensor var_608_cast_fp16 = sub(x = new_k_1_cast_fp16, y = read_state_0)[name = string("op_608_cast_fp16")]; tensor var_609_cast_fp16 = mul(x = var_608_cast_fp16, y = write_oh_b_1)[name = string("op_609_cast_fp16")]; tensor sa_k_buf_3_cast_fp16 = add(x = read_state_0, y = var_609_cast_fp16)[name = string("sa_k_buf_3_cast_fp16")]; write_state(data = sa_k_buf_3_cast_fp16, input = sa_k_0)[name = string("coreml_update_state_24_write_state")]; tensor coreml_update_state_24 = read_state(input = sa_k_0)[name = string("coreml_update_state_24")]; tensor read_state_1 = read_state(input = sa_v_0)[name = string("read_state_1")]; tensor var_613_cast_fp16 = sub(x = new_v_1_cast_fp16, y = read_state_1)[name = string("op_613_cast_fp16")]; tensor var_614_cast_fp16 = mul(x = var_613_cast_fp16, y = write_oh_b_1)[name = string("op_614_cast_fp16")]; tensor sa_v_buf_3_cast_fp16 = add(x = read_state_1, y = var_614_cast_fp16)[name = string("sa_v_buf_3_cast_fp16")]; write_state(data = sa_v_buf_3_cast_fp16, input = sa_v_0)[name = string("coreml_update_state_25_write_state")]; tensor coreml_update_state_25 = read_state(input = sa_v_0)[name = string("coreml_update_state_25")]; tensor var_643 = const()[name = string("op_643"), val = tensor([0, 2, -3, -1])]; bool var_645_transpose_x_0 = const()[name = string("op_645_transpose_x_0"), val = bool(false)]; bool var_645_transpose_y_0 = const()[name = string("op_645_transpose_y_0"), val = bool(false)]; tensor transpose_96_perm_0 = const()[name = string("transpose_96_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_97_perm_0 = const()[name = string("transpose_97_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_97 = transpose(perm = transpose_97_perm_0, x = coreml_update_state_24)[name = string("transpose_262")]; tensor transpose_96 = transpose(perm = transpose_96_perm_0, x = q_1_cast_fp16)[name = string("transpose_263")]; tensor var_645_cast_fp16 = matmul(transpose_x = var_645_transpose_x_0, transpose_y = var_645_transpose_y_0, x = transpose_96, y = transpose_97)[name = string("op_645_cast_fp16")]; fp16 var_646_to_fp16 = const()[name = string("op_646_to_fp16"), val = fp16(0x1p-3)]; tensor scores_1_cast_fp16 = mul(x = var_645_cast_fp16, y = var_646_to_fp16)[name = string("scores_1_cast_fp16")]; tensor var_654_axes_0 = const()[name = string("op_654_axes_0"), val = tensor([1])]; tensor var_654_cast_fp16 = expand_dims(axes = var_654_axes_0, x = sa_key_mask_cast_fp16)[name = string("op_654_cast_fp16")]; tensor var_656_axes_0 = const()[name = string("op_656_axes_0"), val = tensor([2])]; tensor var_656_cast_fp16 = expand_dims(axes = var_656_axes_0, x = var_654_cast_fp16)[name = string("op_656_cast_fp16")]; fp16 var_662_promoted_to_fp16 = const()[name = string("op_662_promoted_to_fp16"), val = fp16(0x0p+0)]; tensor var_663_cast_fp16 = equal(x = var_656_cast_fp16, y = var_662_promoted_to_fp16)[name = string("op_663_cast_fp16")]; fp16 var_664_to_fp16 = const()[name = string("op_664_to_fp16"), val = fp16(-inf)]; tensor scores_3_cast_fp16 = select(a = var_664_to_fp16, b = scores_1_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_3_cast_fp16")]; int32 var_666 = const()[name = string("op_666"), val = int32(-1)]; tensor probs_1_cast_fp16 = softmax(axis = var_666, x = scores_3_cast_fp16)[name = string("probs_1_cast_fp16")]; bool var_669_transpose_x_0 = const()[name = string("op_669_transpose_x_0"), val = bool(false)]; bool var_669_transpose_y_0 = const()[name = string("op_669_transpose_y_0"), val = bool(false)]; tensor v_t_1_cast_fp16 = transpose(perm = var_643, x = coreml_update_state_25)[name = string("transpose_261")]; tensor var_669_cast_fp16 = matmul(transpose_x = var_669_transpose_x_0, transpose_y = var_669_transpose_y_0, x = probs_1_cast_fp16, y = v_t_1_cast_fp16)[name = string("op_669_cast_fp16")]; tensor var_674 = const()[name = string("op_674"), val = tensor([0, 2, 1, 3])]; tensor var_679 = const()[name = string("op_679"), val = tensor([1, 1, -1])]; tensor var_675_cast_fp16 = transpose(perm = var_674, x = var_669_cast_fp16)[name = string("transpose_260")]; tensor input_7_cast_fp16 = reshape(shape = var_679, x = var_675_cast_fp16)[name = string("input_7_cast_fp16")]; tensor dec_layers_0_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3358912))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3948800))))[name = string("dec_layers_0_self_attention_o_net_weight_to_fp16_quantized")]; tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3950400)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_0_self_attention_o_net_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor input_9_cast_fp16 = add(x = input_3_cast_fp16, y = linear_1_cast_fp16)[name = string("input_9_cast_fp16")]; tensor input_11_axes_0 = const()[name = string("input_11_axes_0"), val = tensor([-1])]; tensor dec_layers_0_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_0_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3952000)))]; fp16 var_687_to_fp16 = const()[name = string("op_687_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_11_cast_fp16 = layer_norm(axes = input_11_axes_0, epsilon = var_687_to_fp16, gamma = dec_layers_0_norm_xattn_query_weight_to_fp16, x = input_9_cast_fp16)[name = string("input_11_cast_fp16")]; tensor dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(3953600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4051968))))[name = string("dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized")]; tensor linear_2_bias_0_to_fp16 = const()[name = string("linear_2_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4052288)))]; tensor linear_2_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized, x = input_11_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor var_700 = const()[name = string("op_700"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_1_cast_fp16 = reshape(shape = var_700, x = linear_2_cast_fp16)[name = string("xq_proj_1_cast_fp16")]; tensor var_718 = const()[name = string("op_718"), val = tensor([0, 2, -3, -1])]; string xa_v_0_to_fp16_dtype_0 = const()[name = string("xa_v_0_to_fp16_dtype_0"), val = string("fp16")]; bool var_720_transpose_x_0 = const()[name = string("op_720_transpose_x_0"), val = bool(false)]; bool var_720_transpose_y_0 = const()[name = string("op_720_transpose_y_0"), val = bool(false)]; string xa_k_0_to_fp16_dtype_0 = const()[name = string("xa_k_0_to_fp16_dtype_0"), val = string("fp16")]; tensor transpose_98_perm_0 = const()[name = string("transpose_98_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_99_perm_0 = const()[name = string("transpose_99_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_0_to_fp16 = cast(dtype = xa_k_0_to_fp16_dtype_0, x = xa_k_0)[name = string("cast_27")]; tensor transpose_99 = transpose(perm = transpose_99_perm_0, x = xa_k_0_to_fp16)[name = string("transpose_258")]; tensor transpose_98 = transpose(perm = transpose_98_perm_0, x = xq_proj_1_cast_fp16)[name = string("transpose_259")]; tensor var_720_cast_fp16 = matmul(transpose_x = var_720_transpose_x_0, transpose_y = var_720_transpose_y_0, x = transpose_98, y = transpose_99)[name = string("op_720_cast_fp16")]; fp16 var_721_to_fp16 = const()[name = string("op_721_to_fp16"), val = fp16(0x1.6ap-4)]; tensor xscores_1_cast_fp16 = mul(x = var_720_cast_fp16, y = var_721_to_fp16)[name = string("xscores_1_cast_fp16")]; tensor var_729_axes_0 = const()[name = string("op_729_axes_0"), val = tensor([1])]; string encoder_mask_to_fp16_dtype_0 = const()[name = string("encoder_mask_to_fp16_dtype_0"), val = string("fp16")]; tensor encoder_mask_to_fp16 = cast(dtype = encoder_mask_to_fp16_dtype_0, x = encoder_mask)[name = string("cast_26")]; tensor var_729_cast_fp16 = expand_dims(axes = var_729_axes_0, x = encoder_mask_to_fp16)[name = string("op_729_cast_fp16")]; tensor var_731_axes_0 = const()[name = string("op_731_axes_0"), val = tensor([2])]; tensor var_731_cast_fp16 = expand_dims(axes = var_731_axes_0, x = var_729_cast_fp16)[name = string("op_731_cast_fp16")]; fp16 var_737_promoted_to_fp16 = const()[name = string("op_737_promoted_to_fp16"), val = fp16(0x0p+0)]; tensor var_738_cast_fp16 = equal(x = var_731_cast_fp16, y = var_737_promoted_to_fp16)[name = string("op_738_cast_fp16")]; fp16 var_739_to_fp16 = const()[name = string("op_739_to_fp16"), val = fp16(-inf)]; tensor xscores_3_cast_fp16 = select(a = var_739_to_fp16, b = xscores_1_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_3_cast_fp16")]; int32 var_741 = const()[name = string("op_741"), val = int32(-1)]; tensor xprobs_1_cast_fp16 = softmax(axis = var_741, x = xscores_3_cast_fp16)[name = string("xprobs_1_cast_fp16")]; bool var_744_transpose_x_0 = const()[name = string("op_744_transpose_x_0"), val = bool(false)]; bool var_744_transpose_y_0 = const()[name = string("op_744_transpose_y_0"), val = bool(false)]; tensor xa_v_0_to_fp16 = cast(dtype = xa_v_0_to_fp16_dtype_0, x = xa_v_0)[name = string("cast_25")]; tensor xvT_1_cast_fp16 = transpose(perm = var_718, x = xa_v_0_to_fp16)[name = string("transpose_257")]; tensor var_744_cast_fp16 = matmul(transpose_x = var_744_transpose_x_0, transpose_y = var_744_transpose_y_0, x = xprobs_1_cast_fp16, y = xvT_1_cast_fp16)[name = string("op_744_cast_fp16")]; tensor var_749 = const()[name = string("op_749"), val = tensor([0, 2, 1, 3])]; tensor var_754 = const()[name = string("op_754"), val = tensor([1, 1, -1])]; tensor var_750_cast_fp16 = transpose(perm = var_749, x = var_744_cast_fp16)[name = string("transpose_256")]; tensor input_13_cast_fp16 = reshape(shape = var_754, x = var_750_cast_fp16)[name = string("input_13_cast_fp16")]; tensor dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4052608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4150976))))[name = string("dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized")]; tensor linear_3_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized, x = input_13_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor input_15_cast_fp16 = add(x = input_9_cast_fp16, y = linear_3_cast_fp16)[name = string("input_15_cast_fp16")]; tensor x_1_axes_0 = const()[name = string("x_1_axes_0"), val = tensor([-1])]; tensor dec_layers_0_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_0_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4152576)))]; fp16 var_762_to_fp16 = const()[name = string("op_762_to_fp16"), val = fp16(0x1.5p-17)]; tensor x_1_cast_fp16 = layer_norm(axes = x_1_axes_0, epsilon = var_762_to_fp16, gamma = dec_layers_0_norm_pos_ff_weight_to_fp16, x = input_15_cast_fp16)[name = string("x_1_cast_fp16")]; tensor var_778 = const()[name = string("op_778"), val = tensor([0, 2, 1])]; string y_1_pad_type_0 = const()[name = string("y_1_pad_type_0"), val = string("valid")]; tensor y_1_strides_0 = const()[name = string("y_1_strides_0"), val = tensor([1])]; tensor y_1_pad_0 = const()[name = string("y_1_pad_0"), val = tensor([0, 0])]; tensor y_1_dilations_0 = const()[name = string("y_1_dilations_0"), val = tensor([1])]; int32 y_1_groups_0 = const()[name = string("y_1_groups_0"), val = int32(1)]; tensor dec_layers_0_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4154176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6513536))))[name = string("dec_layers_0_pos_ff_proj_weight_to_fp16_quantized")]; tensor x_3_cast_fp16 = transpose(perm = var_778, x = x_1_cast_fp16)[name = string("transpose_255")]; tensor y_1_cast_fp16 = conv(dilations = y_1_dilations_0, groups = y_1_groups_0, pad = y_1_pad_0, pad_type = y_1_pad_type_0, strides = y_1_strides_0, weight = dec_layers_0_pos_ff_proj_weight_to_fp16_quantized, x = x_3_cast_fp16)[name = string("y_1_cast_fp16")]; string x_5_mode_0 = const()[name = string("x_5_mode_0"), val = string("TANH_APPROXIMATION")]; tensor x_5_cast_fp16 = gelu(mode = x_5_mode_0, x = y_1_cast_fp16)[name = string("x_5_cast_fp16")]; string y_3_pad_type_0 = const()[name = string("y_3_pad_type_0"), val = string("valid")]; tensor y_3_strides_0 = const()[name = string("y_3_strides_0"), val = tensor([1])]; tensor y_3_pad_0 = const()[name = string("y_3_pad_0"), val = tensor([0, 0])]; tensor y_3_dilations_0 = const()[name = string("y_3_dilations_0"), val = tensor([1])]; int32 y_3_groups_0 = const()[name = string("y_3_groups_0"), val = int32(1)]; tensor dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6519744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8879104))))[name = string("dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized")]; tensor y_3_cast_fp16 = conv(dilations = y_3_dilations_0, groups = y_3_groups_0, pad = y_3_pad_0, pad_type = y_3_pad_type_0, strides = y_3_strides_0, weight = dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized, x = x_5_cast_fp16)[name = string("y_3_cast_fp16")]; tensor var_796 = const()[name = string("op_796"), val = tensor([0, 2, 1])]; tensor var_797_cast_fp16 = transpose(perm = var_796, x = y_3_cast_fp16)[name = string("transpose_254")]; tensor input_17_cast_fp16 = add(x = input_15_cast_fp16, y = var_797_cast_fp16)[name = string("input_17_cast_fp16")]; tensor input_19_axes_0 = const()[name = string("input_19_axes_0"), val = tensor([-1])]; tensor dec_layers_1_norm_self_weight_to_fp16 = const()[name = string("dec_layers_1_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8880704)))]; fp16 var_801_to_fp16 = const()[name = string("op_801_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_19_cast_fp16 = layer_norm(axes = input_19_axes_0, epsilon = var_801_to_fp16, gamma = dec_layers_1_norm_self_weight_to_fp16, x = input_17_cast_fp16)[name = string("input_19_cast_fp16")]; tensor dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8882304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10651840))))[name = string("dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized")]; tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor var_815 = const()[name = string("op_815"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_7_cast_fp16 = reshape(shape = var_815, x = linear_4_cast_fp16)[name = string("qkv_7_cast_fp16")]; tensor q_3_begin_0 = const()[name = string("q_3_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_3_end_0 = const()[name = string("q_3_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_3_end_mask_0 = const()[name = string("q_3_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_3_squeeze_mask_0 = const()[name = string("q_3_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_3_cast_fp16 = slice_by_index(begin = q_3_begin_0, end = q_3_end_0, end_mask = q_3_end_mask_0, squeeze_mask = q_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = string("q_3_cast_fp16")]; tensor new_k_3_begin_0 = const()[name = string("new_k_3_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_3_end_0 = const()[name = string("new_k_3_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_3_end_mask_0 = const()[name = string("new_k_3_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_3_squeeze_mask_0 = const()[name = string("new_k_3_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_3_cast_fp16 = slice_by_index(begin = new_k_3_begin_0, end = new_k_3_end_0, end_mask = new_k_3_end_mask_0, squeeze_mask = new_k_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = string("new_k_3_cast_fp16")]; tensor new_v_3_begin_0 = const()[name = string("new_v_3_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_3_end_0 = const()[name = string("new_v_3_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_3_end_mask_0 = const()[name = string("new_v_3_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_3_squeeze_mask_0 = const()[name = string("new_v_3_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_3_cast_fp16 = slice_by_index(begin = new_v_3_begin_0, end = new_v_3_end_0, end_mask = new_v_3_end_mask_0, squeeze_mask = new_v_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = string("new_v_3_cast_fp16")]; tensor read_state_2 = read_state(input = sa_k_1)[name = string("read_state_2")]; tensor var_884_cast_fp16 = sub(x = new_k_3_cast_fp16, y = read_state_2)[name = string("op_884_cast_fp16")]; tensor var_885_cast_fp16 = mul(x = var_884_cast_fp16, y = write_oh_b_1)[name = string("op_885_cast_fp16")]; tensor sa_k_buf_7_cast_fp16 = add(x = read_state_2, y = var_885_cast_fp16)[name = string("sa_k_buf_7_cast_fp16")]; write_state(data = sa_k_buf_7_cast_fp16, input = sa_k_1)[name = string("coreml_update_state_26_write_state")]; tensor coreml_update_state_26 = read_state(input = sa_k_1)[name = string("coreml_update_state_26")]; tensor read_state_3 = read_state(input = sa_v_1)[name = string("read_state_3")]; tensor var_889_cast_fp16 = sub(x = new_v_3_cast_fp16, y = read_state_3)[name = string("op_889_cast_fp16")]; tensor var_890_cast_fp16 = mul(x = var_889_cast_fp16, y = write_oh_b_1)[name = string("op_890_cast_fp16")]; tensor sa_v_buf_7_cast_fp16 = add(x = read_state_3, y = var_890_cast_fp16)[name = string("sa_v_buf_7_cast_fp16")]; write_state(data = sa_v_buf_7_cast_fp16, input = sa_v_1)[name = string("coreml_update_state_27_write_state")]; tensor coreml_update_state_27 = read_state(input = sa_v_1)[name = string("coreml_update_state_27")]; tensor var_919 = const()[name = string("op_919"), val = tensor([0, 2, -3, -1])]; bool var_921_transpose_x_0 = const()[name = string("op_921_transpose_x_0"), val = bool(false)]; bool var_921_transpose_y_0 = const()[name = string("op_921_transpose_y_0"), val = bool(false)]; tensor transpose_100_perm_0 = const()[name = string("transpose_100_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_101_perm_0 = const()[name = string("transpose_101_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_101 = transpose(perm = transpose_101_perm_0, x = coreml_update_state_26)[name = string("transpose_252")]; tensor transpose_100 = transpose(perm = transpose_100_perm_0, x = q_3_cast_fp16)[name = string("transpose_253")]; tensor var_921_cast_fp16 = matmul(transpose_x = var_921_transpose_x_0, transpose_y = var_921_transpose_y_0, x = transpose_100, y = transpose_101)[name = string("op_921_cast_fp16")]; fp16 var_922_to_fp16 = const()[name = string("op_922_to_fp16"), val = fp16(0x1p-3)]; tensor scores_5_cast_fp16 = mul(x = var_921_cast_fp16, y = var_922_to_fp16)[name = string("scores_5_cast_fp16")]; fp16 var_940_to_fp16 = const()[name = string("op_940_to_fp16"), val = fp16(-inf)]; tensor scores_7_cast_fp16 = select(a = var_940_to_fp16, b = scores_5_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_7_cast_fp16")]; int32 var_942 = const()[name = string("op_942"), val = int32(-1)]; tensor probs_3_cast_fp16 = softmax(axis = var_942, x = scores_7_cast_fp16)[name = string("probs_3_cast_fp16")]; bool var_945_transpose_x_0 = const()[name = string("op_945_transpose_x_0"), val = bool(false)]; bool var_945_transpose_y_0 = const()[name = string("op_945_transpose_y_0"), val = bool(false)]; tensor v_t_3_cast_fp16 = transpose(perm = var_919, x = coreml_update_state_27)[name = string("transpose_251")]; tensor var_945_cast_fp16 = matmul(transpose_x = var_945_transpose_x_0, transpose_y = var_945_transpose_y_0, x = probs_3_cast_fp16, y = v_t_3_cast_fp16)[name = string("op_945_cast_fp16")]; tensor var_950 = const()[name = string("op_950"), val = tensor([0, 2, 1, 3])]; tensor var_955 = const()[name = string("op_955"), val = tensor([1, 1, -1])]; tensor var_951_cast_fp16 = transpose(perm = var_950, x = var_945_cast_fp16)[name = string("transpose_250")]; tensor input_21_cast_fp16 = reshape(shape = var_955, x = var_951_cast_fp16)[name = string("input_21_cast_fp16")]; tensor dec_layers_1_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10656512))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11246400))))[name = string("dec_layers_1_self_attention_o_net_weight_to_fp16_quantized")]; tensor linear_5_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_1_self_attention_o_net_weight_to_fp16_quantized, x = input_21_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor input_23_cast_fp16 = add(x = input_17_cast_fp16, y = linear_5_cast_fp16)[name = string("input_23_cast_fp16")]; tensor input_25_axes_0 = const()[name = string("input_25_axes_0"), val = tensor([-1])]; tensor dec_layers_1_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_1_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11248000)))]; fp16 var_963_to_fp16 = const()[name = string("op_963_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_25_cast_fp16 = layer_norm(axes = input_25_axes_0, epsilon = var_963_to_fp16, gamma = dec_layers_1_norm_xattn_query_weight_to_fp16, x = input_23_cast_fp16)[name = string("input_25_cast_fp16")]; tensor dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11249600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11347968))))[name = string("dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized")]; tensor linear_6_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized, x = input_25_cast_fp16)[name = string("linear_6_cast_fp16")]; tensor var_976 = const()[name = string("op_976"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_3_cast_fp16 = reshape(shape = var_976, x = linear_6_cast_fp16)[name = string("xq_proj_3_cast_fp16")]; tensor var_994 = const()[name = string("op_994"), val = tensor([0, 2, -3, -1])]; string xa_v_1_to_fp16_dtype_0 = const()[name = string("xa_v_1_to_fp16_dtype_0"), val = string("fp16")]; bool var_996_transpose_x_0 = const()[name = string("op_996_transpose_x_0"), val = bool(false)]; bool var_996_transpose_y_0 = const()[name = string("op_996_transpose_y_0"), val = bool(false)]; string xa_k_1_to_fp16_dtype_0 = const()[name = string("xa_k_1_to_fp16_dtype_0"), val = string("fp16")]; tensor transpose_102_perm_0 = const()[name = string("transpose_102_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_103_perm_0 = const()[name = string("transpose_103_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_1_to_fp16 = cast(dtype = xa_k_1_to_fp16_dtype_0, x = xa_k_1)[name = string("cast_24")]; tensor transpose_103 = transpose(perm = transpose_103_perm_0, x = xa_k_1_to_fp16)[name = string("transpose_248")]; tensor transpose_102 = transpose(perm = transpose_102_perm_0, x = xq_proj_3_cast_fp16)[name = string("transpose_249")]; tensor var_996_cast_fp16 = matmul(transpose_x = var_996_transpose_x_0, transpose_y = var_996_transpose_y_0, x = transpose_102, y = transpose_103)[name = string("op_996_cast_fp16")]; fp16 var_997_to_fp16 = const()[name = string("op_997_to_fp16"), val = fp16(0x1.6ap-4)]; tensor xscores_5_cast_fp16 = mul(x = var_996_cast_fp16, y = var_997_to_fp16)[name = string("xscores_5_cast_fp16")]; fp16 var_1015_to_fp16 = const()[name = string("op_1015_to_fp16"), val = fp16(-inf)]; tensor xscores_7_cast_fp16 = select(a = var_1015_to_fp16, b = xscores_5_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_7_cast_fp16")]; int32 var_1017 = const()[name = string("op_1017"), val = int32(-1)]; tensor xprobs_3_cast_fp16 = softmax(axis = var_1017, x = xscores_7_cast_fp16)[name = string("xprobs_3_cast_fp16")]; bool var_1020_transpose_x_0 = const()[name = string("op_1020_transpose_x_0"), val = bool(false)]; bool var_1020_transpose_y_0 = const()[name = string("op_1020_transpose_y_0"), val = bool(false)]; tensor xa_v_1_to_fp16 = cast(dtype = xa_v_1_to_fp16_dtype_0, x = xa_v_1)[name = string("cast_23")]; tensor xvT_3_cast_fp16 = transpose(perm = var_994, x = xa_v_1_to_fp16)[name = string("transpose_247")]; tensor var_1020_cast_fp16 = matmul(transpose_x = var_1020_transpose_x_0, transpose_y = var_1020_transpose_y_0, x = xprobs_3_cast_fp16, y = xvT_3_cast_fp16)[name = string("op_1020_cast_fp16")]; tensor var_1025 = const()[name = string("op_1025"), val = tensor([0, 2, 1, 3])]; tensor var_1030 = const()[name = string("op_1030"), val = tensor([1, 1, -1])]; tensor var_1026_cast_fp16 = transpose(perm = var_1025, x = var_1020_cast_fp16)[name = string("transpose_246")]; tensor input_27_cast_fp16 = reshape(shape = var_1030, x = var_1026_cast_fp16)[name = string("input_27_cast_fp16")]; tensor dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11348288))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11446656))))[name = string("dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized")]; tensor linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor input_29_cast_fp16 = add(x = input_23_cast_fp16, y = linear_7_cast_fp16)[name = string("input_29_cast_fp16")]; tensor x_9_axes_0 = const()[name = string("x_9_axes_0"), val = tensor([-1])]; tensor dec_layers_1_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_1_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11448256)))]; fp16 var_1038_to_fp16 = const()[name = string("op_1038_to_fp16"), val = fp16(0x1.5p-17)]; tensor x_9_cast_fp16 = layer_norm(axes = x_9_axes_0, epsilon = var_1038_to_fp16, gamma = dec_layers_1_norm_pos_ff_weight_to_fp16, x = input_29_cast_fp16)[name = string("x_9_cast_fp16")]; tensor var_1054 = const()[name = string("op_1054"), val = tensor([0, 2, 1])]; string y_5_pad_type_0 = const()[name = string("y_5_pad_type_0"), val = string("valid")]; tensor y_5_strides_0 = const()[name = string("y_5_strides_0"), val = tensor([1])]; tensor y_5_pad_0 = const()[name = string("y_5_pad_0"), val = tensor([0, 0])]; tensor y_5_dilations_0 = const()[name = string("y_5_dilations_0"), val = tensor([1])]; int32 y_5_groups_0 = const()[name = string("y_5_groups_0"), val = int32(1)]; tensor dec_layers_1_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11449856))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13809216))))[name = string("dec_layers_1_pos_ff_proj_weight_to_fp16_quantized")]; tensor x_11_cast_fp16 = transpose(perm = var_1054, x = x_9_cast_fp16)[name = string("transpose_245")]; tensor y_5_cast_fp16 = conv(dilations = y_5_dilations_0, groups = y_5_groups_0, pad = y_5_pad_0, pad_type = y_5_pad_type_0, strides = y_5_strides_0, weight = dec_layers_1_pos_ff_proj_weight_to_fp16_quantized, x = x_11_cast_fp16)[name = string("y_5_cast_fp16")]; string x_13_mode_0 = const()[name = string("x_13_mode_0"), val = string("TANH_APPROXIMATION")]; tensor x_13_cast_fp16 = gelu(mode = x_13_mode_0, x = y_5_cast_fp16)[name = string("x_13_cast_fp16")]; string y_7_pad_type_0 = const()[name = string("y_7_pad_type_0"), val = string("valid")]; tensor y_7_strides_0 = const()[name = string("y_7_strides_0"), val = tensor([1])]; tensor y_7_pad_0 = const()[name = string("y_7_pad_0"), val = tensor([0, 0])]; tensor y_7_dilations_0 = const()[name = string("y_7_dilations_0"), val = tensor([1])]; int32 y_7_groups_0 = const()[name = string("y_7_groups_0"), val = int32(1)]; tensor dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13815424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16174784))))[name = string("dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized")]; tensor y_7_cast_fp16 = conv(dilations = y_7_dilations_0, groups = y_7_groups_0, pad = y_7_pad_0, pad_type = y_7_pad_type_0, strides = y_7_strides_0, weight = dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized, x = x_13_cast_fp16)[name = string("y_7_cast_fp16")]; tensor var_1072 = const()[name = string("op_1072"), val = tensor([0, 2, 1])]; tensor var_1073_cast_fp16 = transpose(perm = var_1072, x = y_7_cast_fp16)[name = string("transpose_244")]; tensor input_31_cast_fp16 = add(x = input_29_cast_fp16, y = var_1073_cast_fp16)[name = string("input_31_cast_fp16")]; tensor input_33_axes_0 = const()[name = string("input_33_axes_0"), val = tensor([-1])]; tensor dec_layers_2_norm_self_weight_to_fp16 = const()[name = string("dec_layers_2_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16176384)))]; fp16 var_1077_to_fp16 = const()[name = string("op_1077_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, epsilon = var_1077_to_fp16, gamma = dec_layers_2_norm_self_weight_to_fp16, x = input_31_cast_fp16)[name = string("input_33_cast_fp16")]; tensor dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16177984))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17947520))))[name = string("dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized")]; tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized, x = input_33_cast_fp16)[name = string("linear_8_cast_fp16")]; tensor var_1091 = const()[name = string("op_1091"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_11_cast_fp16 = reshape(shape = var_1091, x = linear_8_cast_fp16)[name = string("qkv_11_cast_fp16")]; tensor q_5_begin_0 = const()[name = string("q_5_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_5_end_0 = const()[name = string("q_5_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_5_end_mask_0 = const()[name = string("q_5_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_5_squeeze_mask_0 = const()[name = string("q_5_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_5_cast_fp16 = slice_by_index(begin = q_5_begin_0, end = q_5_end_0, end_mask = q_5_end_mask_0, squeeze_mask = q_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = string("q_5_cast_fp16")]; tensor new_k_5_begin_0 = const()[name = string("new_k_5_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_5_end_0 = const()[name = string("new_k_5_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_5_end_mask_0 = const()[name = string("new_k_5_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_5_squeeze_mask_0 = const()[name = string("new_k_5_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_5_cast_fp16 = slice_by_index(begin = new_k_5_begin_0, end = new_k_5_end_0, end_mask = new_k_5_end_mask_0, squeeze_mask = new_k_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = string("new_k_5_cast_fp16")]; tensor new_v_5_begin_0 = const()[name = string("new_v_5_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_5_end_0 = const()[name = string("new_v_5_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_5_end_mask_0 = const()[name = string("new_v_5_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_5_squeeze_mask_0 = const()[name = string("new_v_5_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_5_cast_fp16 = slice_by_index(begin = new_v_5_begin_0, end = new_v_5_end_0, end_mask = new_v_5_end_mask_0, squeeze_mask = new_v_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = string("new_v_5_cast_fp16")]; tensor read_state_4 = read_state(input = sa_k_2)[name = string("read_state_4")]; tensor var_1160_cast_fp16 = sub(x = new_k_5_cast_fp16, y = read_state_4)[name = string("op_1160_cast_fp16")]; tensor var_1161_cast_fp16 = mul(x = var_1160_cast_fp16, y = write_oh_b_1)[name = string("op_1161_cast_fp16")]; tensor sa_k_buf_11_cast_fp16 = add(x = read_state_4, y = var_1161_cast_fp16)[name = string("sa_k_buf_11_cast_fp16")]; write_state(data = sa_k_buf_11_cast_fp16, input = sa_k_2)[name = string("coreml_update_state_28_write_state")]; tensor coreml_update_state_28 = read_state(input = sa_k_2)[name = string("coreml_update_state_28")]; tensor read_state_5 = read_state(input = sa_v_2)[name = string("read_state_5")]; tensor var_1165_cast_fp16 = sub(x = new_v_5_cast_fp16, y = read_state_5)[name = string("op_1165_cast_fp16")]; tensor var_1166_cast_fp16 = mul(x = var_1165_cast_fp16, y = write_oh_b_1)[name = string("op_1166_cast_fp16")]; tensor sa_v_buf_11_cast_fp16 = add(x = read_state_5, y = var_1166_cast_fp16)[name = string("sa_v_buf_11_cast_fp16")]; write_state(data = sa_v_buf_11_cast_fp16, input = sa_v_2)[name = string("coreml_update_state_29_write_state")]; tensor coreml_update_state_29 = read_state(input = sa_v_2)[name = string("coreml_update_state_29")]; tensor var_1195 = const()[name = string("op_1195"), val = tensor([0, 2, -3, -1])]; bool var_1197_transpose_x_0 = const()[name = string("op_1197_transpose_x_0"), val = bool(false)]; bool var_1197_transpose_y_0 = const()[name = string("op_1197_transpose_y_0"), val = bool(false)]; tensor transpose_104_perm_0 = const()[name = string("transpose_104_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_105_perm_0 = const()[name = string("transpose_105_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_105 = transpose(perm = transpose_105_perm_0, x = coreml_update_state_28)[name = string("transpose_242")]; tensor transpose_104 = transpose(perm = transpose_104_perm_0, x = q_5_cast_fp16)[name = string("transpose_243")]; tensor var_1197_cast_fp16 = matmul(transpose_x = var_1197_transpose_x_0, transpose_y = var_1197_transpose_y_0, x = transpose_104, y = transpose_105)[name = string("op_1197_cast_fp16")]; fp16 var_1198_to_fp16 = const()[name = string("op_1198_to_fp16"), val = fp16(0x1p-3)]; tensor scores_9_cast_fp16 = mul(x = var_1197_cast_fp16, y = var_1198_to_fp16)[name = string("scores_9_cast_fp16")]; fp16 var_1216_to_fp16 = const()[name = string("op_1216_to_fp16"), val = fp16(-inf)]; tensor scores_11_cast_fp16 = select(a = var_1216_to_fp16, b = scores_9_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_11_cast_fp16")]; int32 var_1218 = const()[name = string("op_1218"), val = int32(-1)]; tensor probs_5_cast_fp16 = softmax(axis = var_1218, x = scores_11_cast_fp16)[name = string("probs_5_cast_fp16")]; bool var_1221_transpose_x_0 = const()[name = string("op_1221_transpose_x_0"), val = bool(false)]; bool var_1221_transpose_y_0 = const()[name = string("op_1221_transpose_y_0"), val = bool(false)]; tensor v_t_5_cast_fp16 = transpose(perm = var_1195, x = coreml_update_state_29)[name = string("transpose_241")]; tensor var_1221_cast_fp16 = matmul(transpose_x = var_1221_transpose_x_0, transpose_y = var_1221_transpose_y_0, x = probs_5_cast_fp16, y = v_t_5_cast_fp16)[name = string("op_1221_cast_fp16")]; tensor var_1226 = const()[name = string("op_1226"), val = tensor([0, 2, 1, 3])]; tensor var_1231 = const()[name = string("op_1231"), val = tensor([1, 1, -1])]; tensor var_1227_cast_fp16 = transpose(perm = var_1226, x = var_1221_cast_fp16)[name = string("transpose_240")]; tensor input_35_cast_fp16 = reshape(shape = var_1231, x = var_1227_cast_fp16)[name = string("input_35_cast_fp16")]; tensor dec_layers_2_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17952192))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18542080))))[name = string("dec_layers_2_self_attention_o_net_weight_to_fp16_quantized")]; tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_2_self_attention_o_net_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = string("linear_9_cast_fp16")]; tensor input_37_cast_fp16 = add(x = input_31_cast_fp16, y = linear_9_cast_fp16)[name = string("input_37_cast_fp16")]; tensor input_39_axes_0 = const()[name = string("input_39_axes_0"), val = tensor([-1])]; tensor dec_layers_2_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_2_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18543680)))]; fp16 var_1239_to_fp16 = const()[name = string("op_1239_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_39_cast_fp16 = layer_norm(axes = input_39_axes_0, epsilon = var_1239_to_fp16, gamma = dec_layers_2_norm_xattn_query_weight_to_fp16, x = input_37_cast_fp16)[name = string("input_39_cast_fp16")]; tensor dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18545280))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18643648))))[name = string("dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized")]; tensor linear_10_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized, x = input_39_cast_fp16)[name = string("linear_10_cast_fp16")]; tensor var_1252 = const()[name = string("op_1252"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_5_cast_fp16 = reshape(shape = var_1252, x = linear_10_cast_fp16)[name = string("xq_proj_5_cast_fp16")]; tensor var_1270 = const()[name = string("op_1270"), val = tensor([0, 2, -3, -1])]; string xa_v_2_to_fp16_dtype_0 = const()[name = string("xa_v_2_to_fp16_dtype_0"), val = string("fp16")]; bool var_1272_transpose_x_0 = const()[name = string("op_1272_transpose_x_0"), val = bool(false)]; bool var_1272_transpose_y_0 = const()[name = string("op_1272_transpose_y_0"), val = bool(false)]; string xa_k_2_to_fp16_dtype_0 = const()[name = string("xa_k_2_to_fp16_dtype_0"), val = string("fp16")]; tensor transpose_106_perm_0 = const()[name = string("transpose_106_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_107_perm_0 = const()[name = string("transpose_107_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_2_to_fp16 = cast(dtype = xa_k_2_to_fp16_dtype_0, x = xa_k_2)[name = string("cast_22")]; tensor transpose_107 = transpose(perm = transpose_107_perm_0, x = xa_k_2_to_fp16)[name = string("transpose_238")]; tensor transpose_106 = transpose(perm = transpose_106_perm_0, x = xq_proj_5_cast_fp16)[name = string("transpose_239")]; tensor var_1272_cast_fp16 = matmul(transpose_x = var_1272_transpose_x_0, transpose_y = var_1272_transpose_y_0, x = transpose_106, y = transpose_107)[name = string("op_1272_cast_fp16")]; fp16 var_1273_to_fp16 = const()[name = string("op_1273_to_fp16"), val = fp16(0x1.6ap-4)]; tensor xscores_9_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = string("xscores_9_cast_fp16")]; fp16 var_1291_to_fp16 = const()[name = string("op_1291_to_fp16"), val = fp16(-inf)]; tensor xscores_11_cast_fp16 = select(a = var_1291_to_fp16, b = xscores_9_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_11_cast_fp16")]; int32 var_1293 = const()[name = string("op_1293"), val = int32(-1)]; tensor xprobs_5_cast_fp16 = softmax(axis = var_1293, x = xscores_11_cast_fp16)[name = string("xprobs_5_cast_fp16")]; bool var_1296_transpose_x_0 = const()[name = string("op_1296_transpose_x_0"), val = bool(false)]; bool var_1296_transpose_y_0 = const()[name = string("op_1296_transpose_y_0"), val = bool(false)]; tensor xa_v_2_to_fp16 = cast(dtype = xa_v_2_to_fp16_dtype_0, x = xa_v_2)[name = string("cast_21")]; tensor xvT_5_cast_fp16 = transpose(perm = var_1270, x = xa_v_2_to_fp16)[name = string("transpose_237")]; tensor var_1296_cast_fp16 = matmul(transpose_x = var_1296_transpose_x_0, transpose_y = var_1296_transpose_y_0, x = xprobs_5_cast_fp16, y = xvT_5_cast_fp16)[name = string("op_1296_cast_fp16")]; tensor var_1301 = const()[name = string("op_1301"), val = tensor([0, 2, 1, 3])]; tensor var_1306 = const()[name = string("op_1306"), val = tensor([1, 1, -1])]; tensor var_1302_cast_fp16 = transpose(perm = var_1301, x = var_1296_cast_fp16)[name = string("transpose_236")]; tensor input_41_cast_fp16 = reshape(shape = var_1306, x = var_1302_cast_fp16)[name = string("input_41_cast_fp16")]; tensor dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18643968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18742336))))[name = string("dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized")]; tensor linear_11_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized, x = input_41_cast_fp16)[name = string("linear_11_cast_fp16")]; tensor input_43_cast_fp16 = add(x = input_37_cast_fp16, y = linear_11_cast_fp16)[name = string("input_43_cast_fp16")]; tensor x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor([-1])]; tensor dec_layers_2_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_2_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18743936)))]; fp16 var_1314_to_fp16 = const()[name = string("op_1314_to_fp16"), val = fp16(0x1.5p-17)]; tensor x_17_cast_fp16 = layer_norm(axes = x_17_axes_0, epsilon = var_1314_to_fp16, gamma = dec_layers_2_norm_pos_ff_weight_to_fp16, x = input_43_cast_fp16)[name = string("x_17_cast_fp16")]; tensor var_1330 = const()[name = string("op_1330"), val = tensor([0, 2, 1])]; string y_9_pad_type_0 = const()[name = string("y_9_pad_type_0"), val = string("valid")]; tensor y_9_strides_0 = const()[name = string("y_9_strides_0"), val = tensor([1])]; tensor y_9_pad_0 = const()[name = string("y_9_pad_0"), val = tensor([0, 0])]; tensor y_9_dilations_0 = const()[name = string("y_9_dilations_0"), val = tensor([1])]; int32 y_9_groups_0 = const()[name = string("y_9_groups_0"), val = int32(1)]; tensor dec_layers_2_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18745536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21104896))))[name = string("dec_layers_2_pos_ff_proj_weight_to_fp16_quantized")]; tensor x_19_cast_fp16 = transpose(perm = var_1330, x = x_17_cast_fp16)[name = string("transpose_235")]; tensor y_9_cast_fp16 = conv(dilations = y_9_dilations_0, groups = y_9_groups_0, pad = y_9_pad_0, pad_type = y_9_pad_type_0, strides = y_9_strides_0, weight = dec_layers_2_pos_ff_proj_weight_to_fp16_quantized, x = x_19_cast_fp16)[name = string("y_9_cast_fp16")]; string x_21_mode_0 = const()[name = string("x_21_mode_0"), val = string("TANH_APPROXIMATION")]; tensor x_21_cast_fp16 = gelu(mode = x_21_mode_0, x = y_9_cast_fp16)[name = string("x_21_cast_fp16")]; string y_11_pad_type_0 = const()[name = string("y_11_pad_type_0"), val = string("valid")]; tensor y_11_strides_0 = const()[name = string("y_11_strides_0"), val = tensor([1])]; tensor y_11_pad_0 = const()[name = string("y_11_pad_0"), val = tensor([0, 0])]; tensor y_11_dilations_0 = const()[name = string("y_11_dilations_0"), val = tensor([1])]; int32 y_11_groups_0 = const()[name = string("y_11_groups_0"), val = int32(1)]; tensor dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21111104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23470464))))[name = string("dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized")]; tensor y_11_cast_fp16 = conv(dilations = y_11_dilations_0, groups = y_11_groups_0, pad = y_11_pad_0, pad_type = y_11_pad_type_0, strides = y_11_strides_0, weight = dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized, x = x_21_cast_fp16)[name = string("y_11_cast_fp16")]; tensor var_1348 = const()[name = string("op_1348"), val = tensor([0, 2, 1])]; tensor var_1349_cast_fp16 = transpose(perm = var_1348, x = y_11_cast_fp16)[name = string("transpose_234")]; tensor input_45_cast_fp16 = add(x = input_43_cast_fp16, y = var_1349_cast_fp16)[name = string("input_45_cast_fp16")]; tensor input_47_axes_0 = const()[name = string("input_47_axes_0"), val = tensor([-1])]; tensor dec_layers_3_norm_self_weight_to_fp16 = const()[name = string("dec_layers_3_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23472064)))]; fp16 var_1353_to_fp16 = const()[name = string("op_1353_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, epsilon = var_1353_to_fp16, gamma = dec_layers_3_norm_self_weight_to_fp16, x = input_45_cast_fp16)[name = string("input_47_cast_fp16")]; tensor dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23473664))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25243200))))[name = string("dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized")]; tensor linear_12_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized, x = input_47_cast_fp16)[name = string("linear_12_cast_fp16")]; tensor var_1367 = const()[name = string("op_1367"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_15_cast_fp16 = reshape(shape = var_1367, x = linear_12_cast_fp16)[name = string("qkv_15_cast_fp16")]; tensor q_7_begin_0 = const()[name = string("q_7_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_7_end_0 = const()[name = string("q_7_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_7_end_mask_0 = const()[name = string("q_7_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_7_squeeze_mask_0 = const()[name = string("q_7_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_7_cast_fp16 = slice_by_index(begin = q_7_begin_0, end = q_7_end_0, end_mask = q_7_end_mask_0, squeeze_mask = q_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = string("q_7_cast_fp16")]; tensor new_k_7_begin_0 = const()[name = string("new_k_7_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_7_end_0 = const()[name = string("new_k_7_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_7_end_mask_0 = const()[name = string("new_k_7_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_7_squeeze_mask_0 = const()[name = string("new_k_7_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_7_cast_fp16 = slice_by_index(begin = new_k_7_begin_0, end = new_k_7_end_0, end_mask = new_k_7_end_mask_0, squeeze_mask = new_k_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = string("new_k_7_cast_fp16")]; tensor new_v_7_begin_0 = const()[name = string("new_v_7_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_7_end_0 = const()[name = string("new_v_7_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_7_end_mask_0 = const()[name = string("new_v_7_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_7_squeeze_mask_0 = const()[name = string("new_v_7_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_7_cast_fp16 = slice_by_index(begin = new_v_7_begin_0, end = new_v_7_end_0, end_mask = new_v_7_end_mask_0, squeeze_mask = new_v_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = string("new_v_7_cast_fp16")]; tensor read_state_6 = read_state(input = sa_k_3)[name = string("read_state_6")]; tensor var_1436_cast_fp16 = sub(x = new_k_7_cast_fp16, y = read_state_6)[name = string("op_1436_cast_fp16")]; tensor var_1437_cast_fp16 = mul(x = var_1436_cast_fp16, y = write_oh_b_1)[name = string("op_1437_cast_fp16")]; tensor sa_k_buf_15_cast_fp16 = add(x = read_state_6, y = var_1437_cast_fp16)[name = string("sa_k_buf_15_cast_fp16")]; write_state(data = sa_k_buf_15_cast_fp16, input = sa_k_3)[name = string("coreml_update_state_30_write_state")]; tensor coreml_update_state_30 = read_state(input = sa_k_3)[name = string("coreml_update_state_30")]; tensor read_state_7 = read_state(input = sa_v_3)[name = string("read_state_7")]; tensor var_1441_cast_fp16 = sub(x = new_v_7_cast_fp16, y = read_state_7)[name = string("op_1441_cast_fp16")]; tensor var_1442_cast_fp16 = mul(x = var_1441_cast_fp16, y = write_oh_b_1)[name = string("op_1442_cast_fp16")]; tensor sa_v_buf_15_cast_fp16 = add(x = read_state_7, y = var_1442_cast_fp16)[name = string("sa_v_buf_15_cast_fp16")]; write_state(data = sa_v_buf_15_cast_fp16, input = sa_v_3)[name = string("coreml_update_state_31_write_state")]; tensor coreml_update_state_31 = read_state(input = sa_v_3)[name = string("coreml_update_state_31")]; tensor var_1471 = const()[name = string("op_1471"), val = tensor([0, 2, -3, -1])]; bool var_1473_transpose_x_0 = const()[name = string("op_1473_transpose_x_0"), val = bool(false)]; bool var_1473_transpose_y_0 = const()[name = string("op_1473_transpose_y_0"), val = bool(false)]; tensor transpose_108_perm_0 = const()[name = string("transpose_108_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_109_perm_0 = const()[name = string("transpose_109_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_109 = transpose(perm = transpose_109_perm_0, x = coreml_update_state_30)[name = string("transpose_232")]; tensor transpose_108 = transpose(perm = transpose_108_perm_0, x = q_7_cast_fp16)[name = string("transpose_233")]; tensor var_1473_cast_fp16 = matmul(transpose_x = var_1473_transpose_x_0, transpose_y = var_1473_transpose_y_0, x = transpose_108, y = transpose_109)[name = string("op_1473_cast_fp16")]; fp16 var_1474_to_fp16 = const()[name = string("op_1474_to_fp16"), val = fp16(0x1p-3)]; tensor scores_13_cast_fp16 = mul(x = var_1473_cast_fp16, y = var_1474_to_fp16)[name = string("scores_13_cast_fp16")]; fp16 var_1492_to_fp16 = const()[name = string("op_1492_to_fp16"), val = fp16(-inf)]; tensor scores_15_cast_fp16 = select(a = var_1492_to_fp16, b = scores_13_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_15_cast_fp16")]; int32 var_1494 = const()[name = string("op_1494"), val = int32(-1)]; tensor probs_7_cast_fp16 = softmax(axis = var_1494, x = scores_15_cast_fp16)[name = string("probs_7_cast_fp16")]; bool var_1497_transpose_x_0 = const()[name = string("op_1497_transpose_x_0"), val = bool(false)]; bool var_1497_transpose_y_0 = const()[name = string("op_1497_transpose_y_0"), val = bool(false)]; tensor v_t_7_cast_fp16 = transpose(perm = var_1471, x = coreml_update_state_31)[name = string("transpose_231")]; tensor var_1497_cast_fp16 = matmul(transpose_x = var_1497_transpose_x_0, transpose_y = var_1497_transpose_y_0, x = probs_7_cast_fp16, y = v_t_7_cast_fp16)[name = string("op_1497_cast_fp16")]; tensor var_1502 = const()[name = string("op_1502"), val = tensor([0, 2, 1, 3])]; tensor var_1507 = const()[name = string("op_1507"), val = tensor([1, 1, -1])]; tensor var_1503_cast_fp16 = transpose(perm = var_1502, x = var_1497_cast_fp16)[name = string("transpose_230")]; tensor input_49_cast_fp16 = reshape(shape = var_1507, x = var_1503_cast_fp16)[name = string("input_49_cast_fp16")]; tensor dec_layers_3_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25247872))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25837760))))[name = string("dec_layers_3_self_attention_o_net_weight_to_fp16_quantized")]; tensor linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_3_self_attention_o_net_weight_to_fp16_quantized, x = input_49_cast_fp16)[name = string("linear_13_cast_fp16")]; tensor input_51_cast_fp16 = add(x = input_45_cast_fp16, y = linear_13_cast_fp16)[name = string("input_51_cast_fp16")]; tensor input_53_axes_0 = const()[name = string("input_53_axes_0"), val = tensor([-1])]; tensor dec_layers_3_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_3_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25839360)))]; fp16 var_1515_to_fp16 = const()[name = string("op_1515_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_53_cast_fp16 = layer_norm(axes = input_53_axes_0, epsilon = var_1515_to_fp16, gamma = dec_layers_3_norm_xattn_query_weight_to_fp16, x = input_51_cast_fp16)[name = string("input_53_cast_fp16")]; tensor dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25840960))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25939328))))[name = string("dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized")]; tensor linear_14_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized, x = input_53_cast_fp16)[name = string("linear_14_cast_fp16")]; tensor var_1528 = const()[name = string("op_1528"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_7_cast_fp16 = reshape(shape = var_1528, x = linear_14_cast_fp16)[name = string("xq_proj_7_cast_fp16")]; tensor var_1546 = const()[name = string("op_1546"), val = tensor([0, 2, -3, -1])]; string xa_v_3_to_fp16_dtype_0 = const()[name = string("xa_v_3_to_fp16_dtype_0"), val = string("fp16")]; bool var_1548_transpose_x_0 = const()[name = string("op_1548_transpose_x_0"), val = bool(false)]; bool var_1548_transpose_y_0 = const()[name = string("op_1548_transpose_y_0"), val = bool(false)]; string xa_k_3_to_fp16_dtype_0 = const()[name = string("xa_k_3_to_fp16_dtype_0"), val = string("fp16")]; tensor transpose_110_perm_0 = const()[name = string("transpose_110_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_111_perm_0 = const()[name = string("transpose_111_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_3_to_fp16 = cast(dtype = xa_k_3_to_fp16_dtype_0, x = xa_k_3)[name = string("cast_20")]; tensor transpose_111 = transpose(perm = transpose_111_perm_0, x = xa_k_3_to_fp16)[name = string("transpose_228")]; tensor transpose_110 = transpose(perm = transpose_110_perm_0, x = xq_proj_7_cast_fp16)[name = string("transpose_229")]; tensor var_1548_cast_fp16 = matmul(transpose_x = var_1548_transpose_x_0, transpose_y = var_1548_transpose_y_0, x = transpose_110, y = transpose_111)[name = string("op_1548_cast_fp16")]; fp16 var_1549_to_fp16 = const()[name = string("op_1549_to_fp16"), val = fp16(0x1.6ap-4)]; tensor xscores_13_cast_fp16 = mul(x = var_1548_cast_fp16, y = var_1549_to_fp16)[name = string("xscores_13_cast_fp16")]; fp16 var_1567_to_fp16 = const()[name = string("op_1567_to_fp16"), val = fp16(-inf)]; tensor xscores_15_cast_fp16 = select(a = var_1567_to_fp16, b = xscores_13_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_15_cast_fp16")]; int32 var_1569 = const()[name = string("op_1569"), val = int32(-1)]; tensor xprobs_7_cast_fp16 = softmax(axis = var_1569, x = xscores_15_cast_fp16)[name = string("xprobs_7_cast_fp16")]; bool var_1572_transpose_x_0 = const()[name = string("op_1572_transpose_x_0"), val = bool(false)]; bool var_1572_transpose_y_0 = const()[name = string("op_1572_transpose_y_0"), val = bool(false)]; tensor xa_v_3_to_fp16 = cast(dtype = xa_v_3_to_fp16_dtype_0, x = xa_v_3)[name = string("cast_19")]; tensor xvT_7_cast_fp16 = transpose(perm = var_1546, x = xa_v_3_to_fp16)[name = string("transpose_227")]; tensor var_1572_cast_fp16 = matmul(transpose_x = var_1572_transpose_x_0, transpose_y = var_1572_transpose_y_0, x = xprobs_7_cast_fp16, y = xvT_7_cast_fp16)[name = string("op_1572_cast_fp16")]; tensor var_1577 = const()[name = string("op_1577"), val = tensor([0, 2, 1, 3])]; tensor var_1582 = const()[name = string("op_1582"), val = tensor([1, 1, -1])]; tensor var_1578_cast_fp16 = transpose(perm = var_1577, x = var_1572_cast_fp16)[name = string("transpose_226")]; tensor input_55_cast_fp16 = reshape(shape = var_1582, x = var_1578_cast_fp16)[name = string("input_55_cast_fp16")]; tensor dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25939648))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26038016))))[name = string("dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized")]; tensor linear_15_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized, x = input_55_cast_fp16)[name = string("linear_15_cast_fp16")]; tensor input_57_cast_fp16 = add(x = input_51_cast_fp16, y = linear_15_cast_fp16)[name = string("input_57_cast_fp16")]; tensor x_25_axes_0 = const()[name = string("x_25_axes_0"), val = tensor([-1])]; tensor dec_layers_3_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_3_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26039616)))]; fp16 var_1590_to_fp16 = const()[name = string("op_1590_to_fp16"), val = fp16(0x1.5p-17)]; tensor x_25_cast_fp16 = layer_norm(axes = x_25_axes_0, epsilon = var_1590_to_fp16, gamma = dec_layers_3_norm_pos_ff_weight_to_fp16, x = input_57_cast_fp16)[name = string("x_25_cast_fp16")]; tensor var_1606 = const()[name = string("op_1606"), val = tensor([0, 2, 1])]; string y_13_pad_type_0 = const()[name = string("y_13_pad_type_0"), val = string("valid")]; tensor y_13_strides_0 = const()[name = string("y_13_strides_0"), val = tensor([1])]; tensor y_13_pad_0 = const()[name = string("y_13_pad_0"), val = tensor([0, 0])]; tensor y_13_dilations_0 = const()[name = string("y_13_dilations_0"), val = tensor([1])]; int32 y_13_groups_0 = const()[name = string("y_13_groups_0"), val = int32(1)]; tensor dec_layers_3_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26041216))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28400576))))[name = string("dec_layers_3_pos_ff_proj_weight_to_fp16_quantized")]; tensor x_27_cast_fp16 = transpose(perm = var_1606, x = x_25_cast_fp16)[name = string("transpose_225")]; tensor y_13_cast_fp16 = conv(dilations = y_13_dilations_0, groups = y_13_groups_0, pad = y_13_pad_0, pad_type = y_13_pad_type_0, strides = y_13_strides_0, weight = dec_layers_3_pos_ff_proj_weight_to_fp16_quantized, x = x_27_cast_fp16)[name = string("y_13_cast_fp16")]; string x_29_mode_0 = const()[name = string("x_29_mode_0"), val = string("TANH_APPROXIMATION")]; tensor x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = y_13_cast_fp16)[name = string("x_29_cast_fp16")]; string y_15_pad_type_0 = const()[name = string("y_15_pad_type_0"), val = string("valid")]; tensor y_15_strides_0 = const()[name = string("y_15_strides_0"), val = tensor([1])]; tensor y_15_pad_0 = const()[name = string("y_15_pad_0"), val = tensor([0, 0])]; tensor y_15_dilations_0 = const()[name = string("y_15_dilations_0"), val = tensor([1])]; int32 y_15_groups_0 = const()[name = string("y_15_groups_0"), val = int32(1)]; tensor dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28406784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30766144))))[name = string("dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized")]; tensor y_15_cast_fp16 = conv(dilations = y_15_dilations_0, groups = y_15_groups_0, pad = y_15_pad_0, pad_type = y_15_pad_type_0, strides = y_15_strides_0, weight = dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized, x = x_29_cast_fp16)[name = string("y_15_cast_fp16")]; tensor var_1624 = const()[name = string("op_1624"), val = tensor([0, 2, 1])]; tensor var_1625_cast_fp16 = transpose(perm = var_1624, x = y_15_cast_fp16)[name = string("transpose_224")]; tensor input_59_cast_fp16 = add(x = input_57_cast_fp16, y = var_1625_cast_fp16)[name = string("input_59_cast_fp16")]; tensor input_61_axes_0 = const()[name = string("input_61_axes_0"), val = tensor([-1])]; tensor dec_layers_4_norm_self_weight_to_fp16 = const()[name = string("dec_layers_4_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30767744)))]; fp16 var_1629_to_fp16 = const()[name = string("op_1629_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_61_cast_fp16 = layer_norm(axes = input_61_axes_0, epsilon = var_1629_to_fp16, gamma = dec_layers_4_norm_self_weight_to_fp16, x = input_59_cast_fp16)[name = string("input_61_cast_fp16")]; tensor dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30769344))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32538880))))[name = string("dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized")]; tensor linear_16_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized, x = input_61_cast_fp16)[name = string("linear_16_cast_fp16")]; tensor var_1643 = const()[name = string("op_1643"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_19_cast_fp16 = reshape(shape = var_1643, x = linear_16_cast_fp16)[name = string("qkv_19_cast_fp16")]; tensor q_9_begin_0 = const()[name = string("q_9_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_9_end_0 = const()[name = string("q_9_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_9_end_mask_0 = const()[name = string("q_9_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_9_squeeze_mask_0 = const()[name = string("q_9_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_9_cast_fp16 = slice_by_index(begin = q_9_begin_0, end = q_9_end_0, end_mask = q_9_end_mask_0, squeeze_mask = q_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = string("q_9_cast_fp16")]; tensor new_k_9_begin_0 = const()[name = string("new_k_9_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_9_end_0 = const()[name = string("new_k_9_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_9_end_mask_0 = const()[name = string("new_k_9_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_9_squeeze_mask_0 = const()[name = string("new_k_9_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_9_cast_fp16 = slice_by_index(begin = new_k_9_begin_0, end = new_k_9_end_0, end_mask = new_k_9_end_mask_0, squeeze_mask = new_k_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = string("new_k_9_cast_fp16")]; tensor new_v_9_begin_0 = const()[name = string("new_v_9_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_9_end_0 = const()[name = string("new_v_9_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_9_end_mask_0 = const()[name = string("new_v_9_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_9_squeeze_mask_0 = const()[name = string("new_v_9_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_9_cast_fp16 = slice_by_index(begin = new_v_9_begin_0, end = new_v_9_end_0, end_mask = new_v_9_end_mask_0, squeeze_mask = new_v_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = string("new_v_9_cast_fp16")]; tensor read_state_8 = read_state(input = sa_k_4)[name = string("read_state_8")]; tensor var_1712_cast_fp16 = sub(x = new_k_9_cast_fp16, y = read_state_8)[name = string("op_1712_cast_fp16")]; tensor var_1713_cast_fp16 = mul(x = var_1712_cast_fp16, y = write_oh_b_1)[name = string("op_1713_cast_fp16")]; tensor sa_k_buf_19_cast_fp16 = add(x = read_state_8, y = var_1713_cast_fp16)[name = string("sa_k_buf_19_cast_fp16")]; write_state(data = sa_k_buf_19_cast_fp16, input = sa_k_4)[name = string("coreml_update_state_32_write_state")]; tensor coreml_update_state_32 = read_state(input = sa_k_4)[name = string("coreml_update_state_32")]; tensor read_state_9 = read_state(input = sa_v_4)[name = string("read_state_9")]; tensor var_1717_cast_fp16 = sub(x = new_v_9_cast_fp16, y = read_state_9)[name = string("op_1717_cast_fp16")]; tensor var_1718_cast_fp16 = mul(x = var_1717_cast_fp16, y = write_oh_b_1)[name = string("op_1718_cast_fp16")]; tensor sa_v_buf_19_cast_fp16 = add(x = read_state_9, y = var_1718_cast_fp16)[name = string("sa_v_buf_19_cast_fp16")]; write_state(data = sa_v_buf_19_cast_fp16, input = sa_v_4)[name = string("coreml_update_state_33_write_state")]; tensor coreml_update_state_33 = read_state(input = sa_v_4)[name = string("coreml_update_state_33")]; tensor var_1747 = const()[name = string("op_1747"), val = tensor([0, 2, -3, -1])]; bool var_1749_transpose_x_0 = const()[name = string("op_1749_transpose_x_0"), val = bool(false)]; bool var_1749_transpose_y_0 = const()[name = string("op_1749_transpose_y_0"), val = bool(false)]; tensor transpose_112_perm_0 = const()[name = string("transpose_112_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_113_perm_0 = const()[name = string("transpose_113_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_113 = transpose(perm = transpose_113_perm_0, x = coreml_update_state_32)[name = string("transpose_222")]; tensor transpose_112 = transpose(perm = transpose_112_perm_0, x = q_9_cast_fp16)[name = string("transpose_223")]; tensor var_1749_cast_fp16 = matmul(transpose_x = var_1749_transpose_x_0, transpose_y = var_1749_transpose_y_0, x = transpose_112, y = transpose_113)[name = string("op_1749_cast_fp16")]; fp16 var_1750_to_fp16 = const()[name = string("op_1750_to_fp16"), val = fp16(0x1p-3)]; tensor scores_17_cast_fp16 = mul(x = var_1749_cast_fp16, y = var_1750_to_fp16)[name = string("scores_17_cast_fp16")]; fp16 var_1768_to_fp16 = const()[name = string("op_1768_to_fp16"), val = fp16(-inf)]; tensor scores_19_cast_fp16 = select(a = var_1768_to_fp16, b = scores_17_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_19_cast_fp16")]; int32 var_1770 = const()[name = string("op_1770"), val = int32(-1)]; tensor probs_9_cast_fp16 = softmax(axis = var_1770, x = scores_19_cast_fp16)[name = string("probs_9_cast_fp16")]; bool var_1773_transpose_x_0 = const()[name = string("op_1773_transpose_x_0"), val = bool(false)]; bool var_1773_transpose_y_0 = const()[name = string("op_1773_transpose_y_0"), val = bool(false)]; tensor v_t_9_cast_fp16 = transpose(perm = var_1747, x = coreml_update_state_33)[name = string("transpose_221")]; tensor var_1773_cast_fp16 = matmul(transpose_x = var_1773_transpose_x_0, transpose_y = var_1773_transpose_y_0, x = probs_9_cast_fp16, y = v_t_9_cast_fp16)[name = string("op_1773_cast_fp16")]; tensor var_1778 = const()[name = string("op_1778"), val = tensor([0, 2, 1, 3])]; tensor var_1783 = const()[name = string("op_1783"), val = tensor([1, 1, -1])]; tensor var_1779_cast_fp16 = transpose(perm = var_1778, x = var_1773_cast_fp16)[name = string("transpose_220")]; tensor input_63_cast_fp16 = reshape(shape = var_1783, x = var_1779_cast_fp16)[name = string("input_63_cast_fp16")]; tensor dec_layers_4_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32543552))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33133440))))[name = string("dec_layers_4_self_attention_o_net_weight_to_fp16_quantized")]; tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_4_self_attention_o_net_weight_to_fp16_quantized, x = input_63_cast_fp16)[name = string("linear_17_cast_fp16")]; tensor input_65_cast_fp16 = add(x = input_59_cast_fp16, y = linear_17_cast_fp16)[name = string("input_65_cast_fp16")]; tensor input_67_axes_0 = const()[name = string("input_67_axes_0"), val = tensor([-1])]; tensor dec_layers_4_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_4_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33135040)))]; fp16 var_1791_to_fp16 = const()[name = string("op_1791_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_67_cast_fp16 = layer_norm(axes = input_67_axes_0, epsilon = var_1791_to_fp16, gamma = dec_layers_4_norm_xattn_query_weight_to_fp16, x = input_65_cast_fp16)[name = string("input_67_cast_fp16")]; tensor dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33136640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33235008))))[name = string("dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized")]; tensor linear_18_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = string("linear_18_cast_fp16")]; tensor var_1804 = const()[name = string("op_1804"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_9_cast_fp16 = reshape(shape = var_1804, x = linear_18_cast_fp16)[name = string("xq_proj_9_cast_fp16")]; tensor var_1822 = const()[name = string("op_1822"), val = tensor([0, 2, -3, -1])]; string xa_v_4_to_fp16_dtype_0 = const()[name = string("xa_v_4_to_fp16_dtype_0"), val = string("fp16")]; bool var_1824_transpose_x_0 = const()[name = string("op_1824_transpose_x_0"), val = bool(false)]; bool var_1824_transpose_y_0 = const()[name = string("op_1824_transpose_y_0"), val = bool(false)]; string xa_k_4_to_fp16_dtype_0 = const()[name = string("xa_k_4_to_fp16_dtype_0"), val = string("fp16")]; tensor transpose_114_perm_0 = const()[name = string("transpose_114_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_115_perm_0 = const()[name = string("transpose_115_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_4_to_fp16 = cast(dtype = xa_k_4_to_fp16_dtype_0, x = xa_k_4)[name = string("cast_18")]; tensor transpose_115 = transpose(perm = transpose_115_perm_0, x = xa_k_4_to_fp16)[name = string("transpose_218")]; tensor transpose_114 = transpose(perm = transpose_114_perm_0, x = xq_proj_9_cast_fp16)[name = string("transpose_219")]; tensor var_1824_cast_fp16 = matmul(transpose_x = var_1824_transpose_x_0, transpose_y = var_1824_transpose_y_0, x = transpose_114, y = transpose_115)[name = string("op_1824_cast_fp16")]; fp16 var_1825_to_fp16 = const()[name = string("op_1825_to_fp16"), val = fp16(0x1.6ap-4)]; tensor xscores_17_cast_fp16 = mul(x = var_1824_cast_fp16, y = var_1825_to_fp16)[name = string("xscores_17_cast_fp16")]; fp16 var_1843_to_fp16 = const()[name = string("op_1843_to_fp16"), val = fp16(-inf)]; tensor xscores_19_cast_fp16 = select(a = var_1843_to_fp16, b = xscores_17_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_19_cast_fp16")]; int32 var_1845 = const()[name = string("op_1845"), val = int32(-1)]; tensor xprobs_9_cast_fp16 = softmax(axis = var_1845, x = xscores_19_cast_fp16)[name = string("xprobs_9_cast_fp16")]; bool var_1848_transpose_x_0 = const()[name = string("op_1848_transpose_x_0"), val = bool(false)]; bool var_1848_transpose_y_0 = const()[name = string("op_1848_transpose_y_0"), val = bool(false)]; tensor xa_v_4_to_fp16 = cast(dtype = xa_v_4_to_fp16_dtype_0, x = xa_v_4)[name = string("cast_17")]; tensor xvT_9_cast_fp16 = transpose(perm = var_1822, x = xa_v_4_to_fp16)[name = string("transpose_217")]; tensor var_1848_cast_fp16 = matmul(transpose_x = var_1848_transpose_x_0, transpose_y = var_1848_transpose_y_0, x = xprobs_9_cast_fp16, y = xvT_9_cast_fp16)[name = string("op_1848_cast_fp16")]; tensor var_1853 = const()[name = string("op_1853"), val = tensor([0, 2, 1, 3])]; tensor var_1858 = const()[name = string("op_1858"), val = tensor([1, 1, -1])]; tensor var_1854_cast_fp16 = transpose(perm = var_1853, x = var_1848_cast_fp16)[name = string("transpose_216")]; tensor input_69_cast_fp16 = reshape(shape = var_1858, x = var_1854_cast_fp16)[name = string("input_69_cast_fp16")]; tensor dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33235328))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33333696))))[name = string("dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized")]; tensor linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized, x = input_69_cast_fp16)[name = string("linear_19_cast_fp16")]; tensor input_71_cast_fp16 = add(x = input_65_cast_fp16, y = linear_19_cast_fp16)[name = string("input_71_cast_fp16")]; tensor x_33_axes_0 = const()[name = string("x_33_axes_0"), val = tensor([-1])]; tensor dec_layers_4_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_4_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33335296)))]; fp16 var_1866_to_fp16 = const()[name = string("op_1866_to_fp16"), val = fp16(0x1.5p-17)]; tensor x_33_cast_fp16 = layer_norm(axes = x_33_axes_0, epsilon = var_1866_to_fp16, gamma = dec_layers_4_norm_pos_ff_weight_to_fp16, x = input_71_cast_fp16)[name = string("x_33_cast_fp16")]; tensor var_1882 = const()[name = string("op_1882"), val = tensor([0, 2, 1])]; string y_17_pad_type_0 = const()[name = string("y_17_pad_type_0"), val = string("valid")]; tensor y_17_strides_0 = const()[name = string("y_17_strides_0"), val = tensor([1])]; tensor y_17_pad_0 = const()[name = string("y_17_pad_0"), val = tensor([0, 0])]; tensor y_17_dilations_0 = const()[name = string("y_17_dilations_0"), val = tensor([1])]; int32 y_17_groups_0 = const()[name = string("y_17_groups_0"), val = int32(1)]; tensor dec_layers_4_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33336896))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35696256))))[name = string("dec_layers_4_pos_ff_proj_weight_to_fp16_quantized")]; tensor x_35_cast_fp16 = transpose(perm = var_1882, x = x_33_cast_fp16)[name = string("transpose_215")]; tensor y_17_cast_fp16 = conv(dilations = y_17_dilations_0, groups = y_17_groups_0, pad = y_17_pad_0, pad_type = y_17_pad_type_0, strides = y_17_strides_0, weight = dec_layers_4_pos_ff_proj_weight_to_fp16_quantized, x = x_35_cast_fp16)[name = string("y_17_cast_fp16")]; string x_37_mode_0 = const()[name = string("x_37_mode_0"), val = string("TANH_APPROXIMATION")]; tensor x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = y_17_cast_fp16)[name = string("x_37_cast_fp16")]; string y_19_pad_type_0 = const()[name = string("y_19_pad_type_0"), val = string("valid")]; tensor y_19_strides_0 = const()[name = string("y_19_strides_0"), val = tensor([1])]; tensor y_19_pad_0 = const()[name = string("y_19_pad_0"), val = tensor([0, 0])]; tensor y_19_dilations_0 = const()[name = string("y_19_dilations_0"), val = tensor([1])]; int32 y_19_groups_0 = const()[name = string("y_19_groups_0"), val = int32(1)]; tensor dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35702464))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38061824))))[name = string("dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized")]; tensor y_19_cast_fp16 = conv(dilations = y_19_dilations_0, groups = y_19_groups_0, pad = y_19_pad_0, pad_type = y_19_pad_type_0, strides = y_19_strides_0, weight = dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized, x = x_37_cast_fp16)[name = string("y_19_cast_fp16")]; tensor var_1900 = const()[name = string("op_1900"), val = tensor([0, 2, 1])]; tensor var_1901_cast_fp16 = transpose(perm = var_1900, x = y_19_cast_fp16)[name = string("transpose_214")]; tensor input_73_cast_fp16 = add(x = input_71_cast_fp16, y = var_1901_cast_fp16)[name = string("input_73_cast_fp16")]; tensor input_75_axes_0 = const()[name = string("input_75_axes_0"), val = tensor([-1])]; tensor dec_layers_5_norm_self_weight_to_fp16 = const()[name = string("dec_layers_5_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38063424)))]; fp16 var_1905_to_fp16 = const()[name = string("op_1905_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_75_cast_fp16 = layer_norm(axes = input_75_axes_0, epsilon = var_1905_to_fp16, gamma = dec_layers_5_norm_self_weight_to_fp16, x = input_73_cast_fp16)[name = string("input_75_cast_fp16")]; tensor dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38065024))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39834560))))[name = string("dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized")]; tensor linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = string("linear_20_cast_fp16")]; tensor var_1919 = const()[name = string("op_1919"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_23_cast_fp16 = reshape(shape = var_1919, x = linear_20_cast_fp16)[name = string("qkv_23_cast_fp16")]; tensor q_11_begin_0 = const()[name = string("q_11_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_11_end_0 = const()[name = string("q_11_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_11_end_mask_0 = const()[name = string("q_11_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_11_squeeze_mask_0 = const()[name = string("q_11_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_11_cast_fp16 = slice_by_index(begin = q_11_begin_0, end = q_11_end_0, end_mask = q_11_end_mask_0, squeeze_mask = q_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = string("q_11_cast_fp16")]; tensor new_k_11_begin_0 = const()[name = string("new_k_11_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_11_end_0 = const()[name = string("new_k_11_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_11_end_mask_0 = const()[name = string("new_k_11_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_11_squeeze_mask_0 = const()[name = string("new_k_11_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_11_cast_fp16 = slice_by_index(begin = new_k_11_begin_0, end = new_k_11_end_0, end_mask = new_k_11_end_mask_0, squeeze_mask = new_k_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = string("new_k_11_cast_fp16")]; tensor new_v_11_begin_0 = const()[name = string("new_v_11_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_11_end_0 = const()[name = string("new_v_11_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_11_end_mask_0 = const()[name = string("new_v_11_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_11_squeeze_mask_0 = const()[name = string("new_v_11_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_11_cast_fp16 = slice_by_index(begin = new_v_11_begin_0, end = new_v_11_end_0, end_mask = new_v_11_end_mask_0, squeeze_mask = new_v_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = string("new_v_11_cast_fp16")]; tensor read_state_10 = read_state(input = sa_k_5)[name = string("read_state_10")]; tensor var_1988_cast_fp16 = sub(x = new_k_11_cast_fp16, y = read_state_10)[name = string("op_1988_cast_fp16")]; tensor var_1989_cast_fp16 = mul(x = var_1988_cast_fp16, y = write_oh_b_1)[name = string("op_1989_cast_fp16")]; tensor sa_k_buf_23_cast_fp16 = add(x = read_state_10, y = var_1989_cast_fp16)[name = string("sa_k_buf_23_cast_fp16")]; write_state(data = sa_k_buf_23_cast_fp16, input = sa_k_5)[name = string("coreml_update_state_34_write_state")]; tensor coreml_update_state_34 = read_state(input = sa_k_5)[name = string("coreml_update_state_34")]; tensor read_state_11 = read_state(input = sa_v_5)[name = string("read_state_11")]; tensor var_1993_cast_fp16 = sub(x = new_v_11_cast_fp16, y = read_state_11)[name = string("op_1993_cast_fp16")]; tensor var_1994_cast_fp16 = mul(x = var_1993_cast_fp16, y = write_oh_b_1)[name = string("op_1994_cast_fp16")]; tensor sa_v_buf_23_cast_fp16 = add(x = read_state_11, y = var_1994_cast_fp16)[name = string("sa_v_buf_23_cast_fp16")]; write_state(data = sa_v_buf_23_cast_fp16, input = sa_v_5)[name = string("coreml_update_state_35_write_state")]; tensor coreml_update_state_35 = read_state(input = sa_v_5)[name = string("coreml_update_state_35")]; tensor var_2023 = const()[name = string("op_2023"), val = tensor([0, 2, -3, -1])]; bool var_2025_transpose_x_0 = const()[name = string("op_2025_transpose_x_0"), val = bool(false)]; bool var_2025_transpose_y_0 = const()[name = string("op_2025_transpose_y_0"), val = bool(false)]; tensor transpose_116_perm_0 = const()[name = string("transpose_116_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_117_perm_0 = const()[name = string("transpose_117_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_117 = transpose(perm = transpose_117_perm_0, x = coreml_update_state_34)[name = string("transpose_212")]; tensor transpose_116 = transpose(perm = transpose_116_perm_0, x = q_11_cast_fp16)[name = string("transpose_213")]; tensor var_2025_cast_fp16 = matmul(transpose_x = var_2025_transpose_x_0, transpose_y = var_2025_transpose_y_0, x = transpose_116, y = transpose_117)[name = string("op_2025_cast_fp16")]; fp16 var_2026_to_fp16 = const()[name = string("op_2026_to_fp16"), val = fp16(0x1p-3)]; tensor scores_21_cast_fp16 = mul(x = var_2025_cast_fp16, y = var_2026_to_fp16)[name = string("scores_21_cast_fp16")]; fp16 var_2044_to_fp16 = const()[name = string("op_2044_to_fp16"), val = fp16(-inf)]; tensor scores_23_cast_fp16 = select(a = var_2044_to_fp16, b = scores_21_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_23_cast_fp16")]; int32 var_2046 = const()[name = string("op_2046"), val = int32(-1)]; tensor probs_11_cast_fp16 = softmax(axis = var_2046, x = scores_23_cast_fp16)[name = string("probs_11_cast_fp16")]; bool var_2049_transpose_x_0 = const()[name = string("op_2049_transpose_x_0"), val = bool(false)]; bool var_2049_transpose_y_0 = const()[name = string("op_2049_transpose_y_0"), val = bool(false)]; tensor v_t_11_cast_fp16 = transpose(perm = var_2023, x = coreml_update_state_35)[name = string("transpose_211")]; tensor var_2049_cast_fp16 = matmul(transpose_x = var_2049_transpose_x_0, transpose_y = var_2049_transpose_y_0, x = probs_11_cast_fp16, y = v_t_11_cast_fp16)[name = string("op_2049_cast_fp16")]; tensor var_2054 = const()[name = string("op_2054"), val = tensor([0, 2, 1, 3])]; tensor var_2059 = const()[name = string("op_2059"), val = tensor([1, 1, -1])]; tensor var_2055_cast_fp16 = transpose(perm = var_2054, x = var_2049_cast_fp16)[name = string("transpose_210")]; tensor input_77_cast_fp16 = reshape(shape = var_2059, x = var_2055_cast_fp16)[name = string("input_77_cast_fp16")]; tensor dec_layers_5_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39839232))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40429120))))[name = string("dec_layers_5_self_attention_o_net_weight_to_fp16_quantized")]; tensor linear_21_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_5_self_attention_o_net_weight_to_fp16_quantized, x = input_77_cast_fp16)[name = string("linear_21_cast_fp16")]; tensor input_79_cast_fp16 = add(x = input_73_cast_fp16, y = linear_21_cast_fp16)[name = string("input_79_cast_fp16")]; tensor input_81_axes_0 = const()[name = string("input_81_axes_0"), val = tensor([-1])]; tensor dec_layers_5_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_5_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40430720)))]; fp16 var_2067_to_fp16 = const()[name = string("op_2067_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_81_cast_fp16 = layer_norm(axes = input_81_axes_0, epsilon = var_2067_to_fp16, gamma = dec_layers_5_norm_xattn_query_weight_to_fp16, x = input_79_cast_fp16)[name = string("input_81_cast_fp16")]; tensor dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40432320))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40530688))))[name = string("dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized")]; tensor linear_22_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized, x = input_81_cast_fp16)[name = string("linear_22_cast_fp16")]; tensor var_2080 = const()[name = string("op_2080"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_11_cast_fp16 = reshape(shape = var_2080, x = linear_22_cast_fp16)[name = string("xq_proj_11_cast_fp16")]; tensor var_2098 = const()[name = string("op_2098"), val = tensor([0, 2, -3, -1])]; string xa_v_5_to_fp16_dtype_0 = const()[name = string("xa_v_5_to_fp16_dtype_0"), val = string("fp16")]; bool var_2100_transpose_x_0 = const()[name = string("op_2100_transpose_x_0"), val = bool(false)]; bool var_2100_transpose_y_0 = const()[name = string("op_2100_transpose_y_0"), val = bool(false)]; string xa_k_5_to_fp16_dtype_0 = const()[name = string("xa_k_5_to_fp16_dtype_0"), val = string("fp16")]; tensor transpose_118_perm_0 = const()[name = string("transpose_118_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_119_perm_0 = const()[name = string("transpose_119_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_5_to_fp16 = cast(dtype = xa_k_5_to_fp16_dtype_0, x = xa_k_5)[name = string("cast_16")]; tensor transpose_119 = transpose(perm = transpose_119_perm_0, x = xa_k_5_to_fp16)[name = string("transpose_208")]; tensor transpose_118 = transpose(perm = transpose_118_perm_0, x = xq_proj_11_cast_fp16)[name = string("transpose_209")]; tensor var_2100_cast_fp16 = matmul(transpose_x = var_2100_transpose_x_0, transpose_y = var_2100_transpose_y_0, x = transpose_118, y = transpose_119)[name = string("op_2100_cast_fp16")]; fp16 var_2101_to_fp16 = const()[name = string("op_2101_to_fp16"), val = fp16(0x1.6ap-4)]; tensor xscores_21_cast_fp16 = mul(x = var_2100_cast_fp16, y = var_2101_to_fp16)[name = string("xscores_21_cast_fp16")]; fp16 var_2119_to_fp16 = const()[name = string("op_2119_to_fp16"), val = fp16(-inf)]; tensor xscores_23_cast_fp16 = select(a = var_2119_to_fp16, b = xscores_21_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_23_cast_fp16")]; int32 var_2121 = const()[name = string("op_2121"), val = int32(-1)]; tensor xprobs_11_cast_fp16 = softmax(axis = var_2121, x = xscores_23_cast_fp16)[name = string("xprobs_11_cast_fp16")]; bool var_2124_transpose_x_0 = const()[name = string("op_2124_transpose_x_0"), val = bool(false)]; bool var_2124_transpose_y_0 = const()[name = string("op_2124_transpose_y_0"), val = bool(false)]; tensor xa_v_5_to_fp16 = cast(dtype = xa_v_5_to_fp16_dtype_0, x = xa_v_5)[name = string("cast_15")]; tensor xvT_11_cast_fp16 = transpose(perm = var_2098, x = xa_v_5_to_fp16)[name = string("transpose_207")]; tensor var_2124_cast_fp16 = matmul(transpose_x = var_2124_transpose_x_0, transpose_y = var_2124_transpose_y_0, x = xprobs_11_cast_fp16, y = xvT_11_cast_fp16)[name = string("op_2124_cast_fp16")]; tensor var_2129 = const()[name = string("op_2129"), val = tensor([0, 2, 1, 3])]; tensor var_2134 = const()[name = string("op_2134"), val = tensor([1, 1, -1])]; tensor var_2130_cast_fp16 = transpose(perm = var_2129, x = var_2124_cast_fp16)[name = string("transpose_206")]; tensor input_83_cast_fp16 = reshape(shape = var_2134, x = var_2130_cast_fp16)[name = string("input_83_cast_fp16")]; tensor dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40531008))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40629376))))[name = string("dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized")]; tensor linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = string("linear_23_cast_fp16")]; tensor input_85_cast_fp16 = add(x = input_79_cast_fp16, y = linear_23_cast_fp16)[name = string("input_85_cast_fp16")]; tensor x_41_axes_0 = const()[name = string("x_41_axes_0"), val = tensor([-1])]; tensor dec_layers_5_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_5_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40630976)))]; fp16 var_2142_to_fp16 = const()[name = string("op_2142_to_fp16"), val = fp16(0x1.5p-17)]; tensor x_41_cast_fp16 = layer_norm(axes = x_41_axes_0, epsilon = var_2142_to_fp16, gamma = dec_layers_5_norm_pos_ff_weight_to_fp16, x = input_85_cast_fp16)[name = string("x_41_cast_fp16")]; tensor var_2158 = const()[name = string("op_2158"), val = tensor([0, 2, 1])]; string y_21_pad_type_0 = const()[name = string("y_21_pad_type_0"), val = string("valid")]; tensor y_21_strides_0 = const()[name = string("y_21_strides_0"), val = tensor([1])]; tensor y_21_pad_0 = const()[name = string("y_21_pad_0"), val = tensor([0, 0])]; tensor y_21_dilations_0 = const()[name = string("y_21_dilations_0"), val = tensor([1])]; int32 y_21_groups_0 = const()[name = string("y_21_groups_0"), val = int32(1)]; tensor dec_layers_5_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40632576))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42991936))))[name = string("dec_layers_5_pos_ff_proj_weight_to_fp16_quantized")]; tensor x_43_cast_fp16 = transpose(perm = var_2158, x = x_41_cast_fp16)[name = string("transpose_205")]; tensor y_21_cast_fp16 = conv(dilations = y_21_dilations_0, groups = y_21_groups_0, pad = y_21_pad_0, pad_type = y_21_pad_type_0, strides = y_21_strides_0, weight = dec_layers_5_pos_ff_proj_weight_to_fp16_quantized, x = x_43_cast_fp16)[name = string("y_21_cast_fp16")]; string x_45_mode_0 = const()[name = string("x_45_mode_0"), val = string("TANH_APPROXIMATION")]; tensor x_45_cast_fp16 = gelu(mode = x_45_mode_0, x = y_21_cast_fp16)[name = string("x_45_cast_fp16")]; string y_23_pad_type_0 = const()[name = string("y_23_pad_type_0"), val = string("valid")]; tensor y_23_strides_0 = const()[name = string("y_23_strides_0"), val = tensor([1])]; tensor y_23_pad_0 = const()[name = string("y_23_pad_0"), val = tensor([0, 0])]; tensor y_23_dilations_0 = const()[name = string("y_23_dilations_0"), val = tensor([1])]; int32 y_23_groups_0 = const()[name = string("y_23_groups_0"), val = int32(1)]; tensor dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42998144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45357504))))[name = string("dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized")]; tensor y_23_cast_fp16 = conv(dilations = y_23_dilations_0, groups = y_23_groups_0, pad = y_23_pad_0, pad_type = y_23_pad_type_0, strides = y_23_strides_0, weight = dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized, x = x_45_cast_fp16)[name = string("y_23_cast_fp16")]; tensor var_2176 = const()[name = string("op_2176"), val = tensor([0, 2, 1])]; tensor var_2177_cast_fp16 = transpose(perm = var_2176, x = y_23_cast_fp16)[name = string("transpose_204")]; tensor input_87_cast_fp16 = add(x = input_85_cast_fp16, y = var_2177_cast_fp16)[name = string("input_87_cast_fp16")]; tensor input_89_axes_0 = const()[name = string("input_89_axes_0"), val = tensor([-1])]; tensor dec_layers_6_norm_self_weight_to_fp16 = const()[name = string("dec_layers_6_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45359104)))]; fp16 var_2181_to_fp16 = const()[name = string("op_2181_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_89_cast_fp16 = layer_norm(axes = input_89_axes_0, epsilon = var_2181_to_fp16, gamma = dec_layers_6_norm_self_weight_to_fp16, x = input_87_cast_fp16)[name = string("input_89_cast_fp16")]; tensor dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45360704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47130240))))[name = string("dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized")]; tensor linear_24_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized, x = input_89_cast_fp16)[name = string("linear_24_cast_fp16")]; tensor var_2195 = const()[name = string("op_2195"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_27_cast_fp16 = reshape(shape = var_2195, x = linear_24_cast_fp16)[name = string("qkv_27_cast_fp16")]; tensor q_13_begin_0 = const()[name = string("q_13_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_13_end_0 = const()[name = string("q_13_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_13_end_mask_0 = const()[name = string("q_13_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_13_squeeze_mask_0 = const()[name = string("q_13_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_13_cast_fp16 = slice_by_index(begin = q_13_begin_0, end = q_13_end_0, end_mask = q_13_end_mask_0, squeeze_mask = q_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = string("q_13_cast_fp16")]; tensor new_k_13_begin_0 = const()[name = string("new_k_13_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_13_end_0 = const()[name = string("new_k_13_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_13_end_mask_0 = const()[name = string("new_k_13_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_13_squeeze_mask_0 = const()[name = string("new_k_13_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_13_cast_fp16 = slice_by_index(begin = new_k_13_begin_0, end = new_k_13_end_0, end_mask = new_k_13_end_mask_0, squeeze_mask = new_k_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = string("new_k_13_cast_fp16")]; tensor new_v_13_begin_0 = const()[name = string("new_v_13_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_13_end_0 = const()[name = string("new_v_13_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_13_end_mask_0 = const()[name = string("new_v_13_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_13_squeeze_mask_0 = const()[name = string("new_v_13_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_13_cast_fp16 = slice_by_index(begin = new_v_13_begin_0, end = new_v_13_end_0, end_mask = new_v_13_end_mask_0, squeeze_mask = new_v_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = string("new_v_13_cast_fp16")]; tensor read_state_12 = read_state(input = sa_k_6)[name = string("read_state_12")]; tensor var_2264_cast_fp16 = sub(x = new_k_13_cast_fp16, y = read_state_12)[name = string("op_2264_cast_fp16")]; tensor var_2265_cast_fp16 = mul(x = var_2264_cast_fp16, y = write_oh_b_1)[name = string("op_2265_cast_fp16")]; tensor sa_k_buf_27_cast_fp16 = add(x = read_state_12, y = var_2265_cast_fp16)[name = string("sa_k_buf_27_cast_fp16")]; write_state(data = sa_k_buf_27_cast_fp16, input = sa_k_6)[name = string("coreml_update_state_36_write_state")]; tensor coreml_update_state_36 = read_state(input = sa_k_6)[name = string("coreml_update_state_36")]; tensor read_state_13 = read_state(input = sa_v_6)[name = string("read_state_13")]; tensor var_2269_cast_fp16 = sub(x = new_v_13_cast_fp16, y = read_state_13)[name = string("op_2269_cast_fp16")]; tensor var_2270_cast_fp16 = mul(x = var_2269_cast_fp16, y = write_oh_b_1)[name = string("op_2270_cast_fp16")]; tensor sa_v_buf_27_cast_fp16 = add(x = read_state_13, y = var_2270_cast_fp16)[name = string("sa_v_buf_27_cast_fp16")]; write_state(data = sa_v_buf_27_cast_fp16, input = sa_v_6)[name = string("coreml_update_state_37_write_state")]; tensor coreml_update_state_37 = read_state(input = sa_v_6)[name = string("coreml_update_state_37")]; tensor var_2299 = const()[name = string("op_2299"), val = tensor([0, 2, -3, -1])]; bool var_2301_transpose_x_0 = const()[name = string("op_2301_transpose_x_0"), val = bool(false)]; bool var_2301_transpose_y_0 = const()[name = string("op_2301_transpose_y_0"), val = bool(false)]; tensor transpose_120_perm_0 = const()[name = string("transpose_120_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_121_perm_0 = const()[name = string("transpose_121_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_121 = transpose(perm = transpose_121_perm_0, x = coreml_update_state_36)[name = string("transpose_202")]; tensor transpose_120 = transpose(perm = transpose_120_perm_0, x = q_13_cast_fp16)[name = string("transpose_203")]; tensor var_2301_cast_fp16 = matmul(transpose_x = var_2301_transpose_x_0, transpose_y = var_2301_transpose_y_0, x = transpose_120, y = transpose_121)[name = string("op_2301_cast_fp16")]; fp16 var_2302_to_fp16 = const()[name = string("op_2302_to_fp16"), val = fp16(0x1p-3)]; tensor scores_25_cast_fp16 = mul(x = var_2301_cast_fp16, y = var_2302_to_fp16)[name = string("scores_25_cast_fp16")]; fp16 var_2320_to_fp16 = const()[name = string("op_2320_to_fp16"), val = fp16(-inf)]; tensor scores_27_cast_fp16 = select(a = var_2320_to_fp16, b = scores_25_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_27_cast_fp16")]; int32 var_2322 = const()[name = string("op_2322"), val = int32(-1)]; tensor probs_13_cast_fp16 = softmax(axis = var_2322, x = scores_27_cast_fp16)[name = string("probs_13_cast_fp16")]; bool var_2325_transpose_x_0 = const()[name = string("op_2325_transpose_x_0"), val = bool(false)]; bool var_2325_transpose_y_0 = const()[name = string("op_2325_transpose_y_0"), val = bool(false)]; tensor v_t_13_cast_fp16 = transpose(perm = var_2299, x = coreml_update_state_37)[name = string("transpose_201")]; tensor var_2325_cast_fp16 = matmul(transpose_x = var_2325_transpose_x_0, transpose_y = var_2325_transpose_y_0, x = probs_13_cast_fp16, y = v_t_13_cast_fp16)[name = string("op_2325_cast_fp16")]; tensor var_2330 = const()[name = string("op_2330"), val = tensor([0, 2, 1, 3])]; tensor var_2335 = const()[name = string("op_2335"), val = tensor([1, 1, -1])]; tensor var_2331_cast_fp16 = transpose(perm = var_2330, x = var_2325_cast_fp16)[name = string("transpose_200")]; tensor input_91_cast_fp16 = reshape(shape = var_2335, x = var_2331_cast_fp16)[name = string("input_91_cast_fp16")]; tensor dec_layers_6_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47134912))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47724800))))[name = string("dec_layers_6_self_attention_o_net_weight_to_fp16_quantized")]; tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_6_self_attention_o_net_weight_to_fp16_quantized, x = input_91_cast_fp16)[name = string("linear_25_cast_fp16")]; tensor input_93_cast_fp16 = add(x = input_87_cast_fp16, y = linear_25_cast_fp16)[name = string("input_93_cast_fp16")]; tensor input_95_axes_0 = const()[name = string("input_95_axes_0"), val = tensor([-1])]; tensor dec_layers_6_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_6_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47726400)))]; fp16 var_2343_to_fp16 = const()[name = string("op_2343_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_95_cast_fp16 = layer_norm(axes = input_95_axes_0, epsilon = var_2343_to_fp16, gamma = dec_layers_6_norm_xattn_query_weight_to_fp16, x = input_93_cast_fp16)[name = string("input_95_cast_fp16")]; tensor dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47728000))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47826368))))[name = string("dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized")]; tensor linear_26_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized, x = input_95_cast_fp16)[name = string("linear_26_cast_fp16")]; tensor var_2356 = const()[name = string("op_2356"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_13_cast_fp16 = reshape(shape = var_2356, x = linear_26_cast_fp16)[name = string("xq_proj_13_cast_fp16")]; tensor var_2374 = const()[name = string("op_2374"), val = tensor([0, 2, -3, -1])]; string xa_v_6_to_fp16_dtype_0 = const()[name = string("xa_v_6_to_fp16_dtype_0"), val = string("fp16")]; bool var_2376_transpose_x_0 = const()[name = string("op_2376_transpose_x_0"), val = bool(false)]; bool var_2376_transpose_y_0 = const()[name = string("op_2376_transpose_y_0"), val = bool(false)]; string xa_k_6_to_fp16_dtype_0 = const()[name = string("xa_k_6_to_fp16_dtype_0"), val = string("fp16")]; tensor transpose_122_perm_0 = const()[name = string("transpose_122_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_123_perm_0 = const()[name = string("transpose_123_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_6_to_fp16 = cast(dtype = xa_k_6_to_fp16_dtype_0, x = xa_k_6)[name = string("cast_14")]; tensor transpose_123 = transpose(perm = transpose_123_perm_0, x = xa_k_6_to_fp16)[name = string("transpose_198")]; tensor transpose_122 = transpose(perm = transpose_122_perm_0, x = xq_proj_13_cast_fp16)[name = string("transpose_199")]; tensor var_2376_cast_fp16 = matmul(transpose_x = var_2376_transpose_x_0, transpose_y = var_2376_transpose_y_0, x = transpose_122, y = transpose_123)[name = string("op_2376_cast_fp16")]; fp16 var_2377_to_fp16 = const()[name = string("op_2377_to_fp16"), val = fp16(0x1.6ap-4)]; tensor xscores_25_cast_fp16 = mul(x = var_2376_cast_fp16, y = var_2377_to_fp16)[name = string("xscores_25_cast_fp16")]; fp16 var_2395_to_fp16 = const()[name = string("op_2395_to_fp16"), val = fp16(-inf)]; tensor xscores_27_cast_fp16 = select(a = var_2395_to_fp16, b = xscores_25_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_27_cast_fp16")]; int32 var_2397 = const()[name = string("op_2397"), val = int32(-1)]; tensor xprobs_13_cast_fp16 = softmax(axis = var_2397, x = xscores_27_cast_fp16)[name = string("xprobs_13_cast_fp16")]; bool var_2400_transpose_x_0 = const()[name = string("op_2400_transpose_x_0"), val = bool(false)]; bool var_2400_transpose_y_0 = const()[name = string("op_2400_transpose_y_0"), val = bool(false)]; tensor xa_v_6_to_fp16 = cast(dtype = xa_v_6_to_fp16_dtype_0, x = xa_v_6)[name = string("cast_13")]; tensor xvT_13_cast_fp16 = transpose(perm = var_2374, x = xa_v_6_to_fp16)[name = string("transpose_197")]; tensor var_2400_cast_fp16 = matmul(transpose_x = var_2400_transpose_x_0, transpose_y = var_2400_transpose_y_0, x = xprobs_13_cast_fp16, y = xvT_13_cast_fp16)[name = string("op_2400_cast_fp16")]; tensor var_2405 = const()[name = string("op_2405"), val = tensor([0, 2, 1, 3])]; tensor var_2410 = const()[name = string("op_2410"), val = tensor([1, 1, -1])]; tensor var_2406_cast_fp16 = transpose(perm = var_2405, x = var_2400_cast_fp16)[name = string("transpose_196")]; tensor input_97_cast_fp16 = reshape(shape = var_2410, x = var_2406_cast_fp16)[name = string("input_97_cast_fp16")]; tensor dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47826688))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47925056))))[name = string("dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized")]; tensor linear_27_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized, x = input_97_cast_fp16)[name = string("linear_27_cast_fp16")]; tensor input_99_cast_fp16 = add(x = input_93_cast_fp16, y = linear_27_cast_fp16)[name = string("input_99_cast_fp16")]; tensor x_49_axes_0 = const()[name = string("x_49_axes_0"), val = tensor([-1])]; tensor dec_layers_6_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_6_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47926656)))]; fp16 var_2418_to_fp16 = const()[name = string("op_2418_to_fp16"), val = fp16(0x1.5p-17)]; tensor x_49_cast_fp16 = layer_norm(axes = x_49_axes_0, epsilon = var_2418_to_fp16, gamma = dec_layers_6_norm_pos_ff_weight_to_fp16, x = input_99_cast_fp16)[name = string("x_49_cast_fp16")]; tensor var_2434 = const()[name = string("op_2434"), val = tensor([0, 2, 1])]; string y_25_pad_type_0 = const()[name = string("y_25_pad_type_0"), val = string("valid")]; tensor y_25_strides_0 = const()[name = string("y_25_strides_0"), val = tensor([1])]; tensor y_25_pad_0 = const()[name = string("y_25_pad_0"), val = tensor([0, 0])]; tensor y_25_dilations_0 = const()[name = string("y_25_dilations_0"), val = tensor([1])]; int32 y_25_groups_0 = const()[name = string("y_25_groups_0"), val = int32(1)]; tensor dec_layers_6_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47928256))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50287616))))[name = string("dec_layers_6_pos_ff_proj_weight_to_fp16_quantized")]; tensor x_51_cast_fp16 = transpose(perm = var_2434, x = x_49_cast_fp16)[name = string("transpose_195")]; tensor y_25_cast_fp16 = conv(dilations = y_25_dilations_0, groups = y_25_groups_0, pad = y_25_pad_0, pad_type = y_25_pad_type_0, strides = y_25_strides_0, weight = dec_layers_6_pos_ff_proj_weight_to_fp16_quantized, x = x_51_cast_fp16)[name = string("y_25_cast_fp16")]; string x_53_mode_0 = const()[name = string("x_53_mode_0"), val = string("TANH_APPROXIMATION")]; tensor x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = y_25_cast_fp16)[name = string("x_53_cast_fp16")]; string y_27_pad_type_0 = const()[name = string("y_27_pad_type_0"), val = string("valid")]; tensor y_27_strides_0 = const()[name = string("y_27_strides_0"), val = tensor([1])]; tensor y_27_pad_0 = const()[name = string("y_27_pad_0"), val = tensor([0, 0])]; tensor y_27_dilations_0 = const()[name = string("y_27_dilations_0"), val = tensor([1])]; int32 y_27_groups_0 = const()[name = string("y_27_groups_0"), val = int32(1)]; tensor dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50293824))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52653184))))[name = string("dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized")]; tensor y_27_cast_fp16 = conv(dilations = y_27_dilations_0, groups = y_27_groups_0, pad = y_27_pad_0, pad_type = y_27_pad_type_0, strides = y_27_strides_0, weight = dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized, x = x_53_cast_fp16)[name = string("y_27_cast_fp16")]; tensor var_2452 = const()[name = string("op_2452"), val = tensor([0, 2, 1])]; tensor var_2453_cast_fp16 = transpose(perm = var_2452, x = y_27_cast_fp16)[name = string("transpose_194")]; tensor input_101_cast_fp16 = add(x = input_99_cast_fp16, y = var_2453_cast_fp16)[name = string("input_101_cast_fp16")]; tensor input_103_axes_0 = const()[name = string("input_103_axes_0"), val = tensor([-1])]; tensor dec_layers_7_norm_self_weight_to_fp16 = const()[name = string("dec_layers_7_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52654784)))]; fp16 var_2457_to_fp16 = const()[name = string("op_2457_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_103_cast_fp16 = layer_norm(axes = input_103_axes_0, epsilon = var_2457_to_fp16, gamma = dec_layers_7_norm_self_weight_to_fp16, x = input_101_cast_fp16)[name = string("input_103_cast_fp16")]; tensor dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52656384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54425920))))[name = string("dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized")]; tensor linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized, x = input_103_cast_fp16)[name = string("linear_28_cast_fp16")]; tensor var_2471 = const()[name = string("op_2471"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_31_cast_fp16 = reshape(shape = var_2471, x = linear_28_cast_fp16)[name = string("qkv_31_cast_fp16")]; tensor q_15_begin_0 = const()[name = string("q_15_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_15_end_0 = const()[name = string("q_15_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_15_end_mask_0 = const()[name = string("q_15_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_15_squeeze_mask_0 = const()[name = string("q_15_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_15_cast_fp16 = slice_by_index(begin = q_15_begin_0, end = q_15_end_0, end_mask = q_15_end_mask_0, squeeze_mask = q_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = string("q_15_cast_fp16")]; tensor new_k_15_begin_0 = const()[name = string("new_k_15_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_15_end_0 = const()[name = string("new_k_15_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_15_end_mask_0 = const()[name = string("new_k_15_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_15_squeeze_mask_0 = const()[name = string("new_k_15_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_15_cast_fp16 = slice_by_index(begin = new_k_15_begin_0, end = new_k_15_end_0, end_mask = new_k_15_end_mask_0, squeeze_mask = new_k_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = string("new_k_15_cast_fp16")]; tensor new_v_15_begin_0 = const()[name = string("new_v_15_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_15_end_0 = const()[name = string("new_v_15_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_15_end_mask_0 = const()[name = string("new_v_15_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_15_squeeze_mask_0 = const()[name = string("new_v_15_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_15_cast_fp16 = slice_by_index(begin = new_v_15_begin_0, end = new_v_15_end_0, end_mask = new_v_15_end_mask_0, squeeze_mask = new_v_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = string("new_v_15_cast_fp16")]; tensor read_state_14 = read_state(input = sa_k_7)[name = string("read_state_14")]; tensor var_2540_cast_fp16 = sub(x = new_k_15_cast_fp16, y = read_state_14)[name = string("op_2540_cast_fp16")]; tensor var_2541_cast_fp16 = mul(x = var_2540_cast_fp16, y = write_oh_b_1)[name = string("op_2541_cast_fp16")]; tensor sa_k_buf_31_cast_fp16 = add(x = read_state_14, y = var_2541_cast_fp16)[name = string("sa_k_buf_31_cast_fp16")]; write_state(data = sa_k_buf_31_cast_fp16, input = sa_k_7)[name = string("coreml_update_state_38_write_state")]; tensor coreml_update_state_38 = read_state(input = sa_k_7)[name = string("coreml_update_state_38")]; tensor read_state_15 = read_state(input = sa_v_7)[name = string("read_state_15")]; tensor var_2545_cast_fp16 = sub(x = new_v_15_cast_fp16, y = read_state_15)[name = string("op_2545_cast_fp16")]; tensor var_2546_cast_fp16 = mul(x = var_2545_cast_fp16, y = write_oh_b_1)[name = string("op_2546_cast_fp16")]; tensor sa_v_buf_31_cast_fp16 = add(x = read_state_15, y = var_2546_cast_fp16)[name = string("sa_v_buf_31_cast_fp16")]; write_state(data = sa_v_buf_31_cast_fp16, input = sa_v_7)[name = string("coreml_update_state_39_write_state")]; tensor coreml_update_state_39 = read_state(input = sa_v_7)[name = string("coreml_update_state_39")]; tensor var_2575 = const()[name = string("op_2575"), val = tensor([0, 2, -3, -1])]; bool var_2577_transpose_x_0 = const()[name = string("op_2577_transpose_x_0"), val = bool(false)]; bool var_2577_transpose_y_0 = const()[name = string("op_2577_transpose_y_0"), val = bool(false)]; tensor transpose_124_perm_0 = const()[name = string("transpose_124_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_125_perm_0 = const()[name = string("transpose_125_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_125 = transpose(perm = transpose_125_perm_0, x = coreml_update_state_38)[name = string("transpose_192")]; tensor transpose_124 = transpose(perm = transpose_124_perm_0, x = q_15_cast_fp16)[name = string("transpose_193")]; tensor var_2577_cast_fp16 = matmul(transpose_x = var_2577_transpose_x_0, transpose_y = var_2577_transpose_y_0, x = transpose_124, y = transpose_125)[name = string("op_2577_cast_fp16")]; fp16 var_2578_to_fp16 = const()[name = string("op_2578_to_fp16"), val = fp16(0x1p-3)]; tensor scores_29_cast_fp16 = mul(x = var_2577_cast_fp16, y = var_2578_to_fp16)[name = string("scores_29_cast_fp16")]; fp16 var_2596_to_fp16 = const()[name = string("op_2596_to_fp16"), val = fp16(-inf)]; tensor scores_31_cast_fp16 = select(a = var_2596_to_fp16, b = scores_29_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_31_cast_fp16")]; int32 var_2598 = const()[name = string("op_2598"), val = int32(-1)]; tensor probs_15_cast_fp16 = softmax(axis = var_2598, x = scores_31_cast_fp16)[name = string("probs_15_cast_fp16")]; bool var_2601_transpose_x_0 = const()[name = string("op_2601_transpose_x_0"), val = bool(false)]; bool var_2601_transpose_y_0 = const()[name = string("op_2601_transpose_y_0"), val = bool(false)]; tensor v_t_15_cast_fp16 = transpose(perm = var_2575, x = coreml_update_state_39)[name = string("transpose_191")]; tensor var_2601_cast_fp16 = matmul(transpose_x = var_2601_transpose_x_0, transpose_y = var_2601_transpose_y_0, x = probs_15_cast_fp16, y = v_t_15_cast_fp16)[name = string("op_2601_cast_fp16")]; tensor var_2606 = const()[name = string("op_2606"), val = tensor([0, 2, 1, 3])]; tensor var_2611 = const()[name = string("op_2611"), val = tensor([1, 1, -1])]; tensor var_2607_cast_fp16 = transpose(perm = var_2606, x = var_2601_cast_fp16)[name = string("transpose_190")]; tensor input_105_cast_fp16 = reshape(shape = var_2611, x = var_2607_cast_fp16)[name = string("input_105_cast_fp16")]; tensor dec_layers_7_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54430592))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55020480))))[name = string("dec_layers_7_self_attention_o_net_weight_to_fp16_quantized")]; tensor linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_7_self_attention_o_net_weight_to_fp16_quantized, x = input_105_cast_fp16)[name = string("linear_29_cast_fp16")]; tensor input_107_cast_fp16 = add(x = input_101_cast_fp16, y = linear_29_cast_fp16)[name = string("input_107_cast_fp16")]; tensor input_109_axes_0 = const()[name = string("input_109_axes_0"), val = tensor([-1])]; tensor dec_layers_7_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_7_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55022080)))]; fp16 var_2619_to_fp16 = const()[name = string("op_2619_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_109_cast_fp16 = layer_norm(axes = input_109_axes_0, epsilon = var_2619_to_fp16, gamma = dec_layers_7_norm_xattn_query_weight_to_fp16, x = input_107_cast_fp16)[name = string("input_109_cast_fp16")]; tensor dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55023680))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55122048))))[name = string("dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized")]; tensor linear_30_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized, x = input_109_cast_fp16)[name = string("linear_30_cast_fp16")]; tensor var_2632 = const()[name = string("op_2632"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_15_cast_fp16 = reshape(shape = var_2632, x = linear_30_cast_fp16)[name = string("xq_proj_15_cast_fp16")]; tensor var_2650 = const()[name = string("op_2650"), val = tensor([0, 2, -3, -1])]; string xa_v_7_to_fp16_dtype_0 = const()[name = string("xa_v_7_to_fp16_dtype_0"), val = string("fp16")]; bool var_2652_transpose_x_0 = const()[name = string("op_2652_transpose_x_0"), val = bool(false)]; bool var_2652_transpose_y_0 = const()[name = string("op_2652_transpose_y_0"), val = bool(false)]; string xa_k_7_to_fp16_dtype_0 = const()[name = string("xa_k_7_to_fp16_dtype_0"), val = string("fp16")]; tensor transpose_126_perm_0 = const()[name = string("transpose_126_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_127_perm_0 = const()[name = string("transpose_127_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_7_to_fp16 = cast(dtype = xa_k_7_to_fp16_dtype_0, x = xa_k_7)[name = string("cast_12")]; tensor transpose_127 = transpose(perm = transpose_127_perm_0, x = xa_k_7_to_fp16)[name = string("transpose_188")]; tensor transpose_126 = transpose(perm = transpose_126_perm_0, x = xq_proj_15_cast_fp16)[name = string("transpose_189")]; tensor var_2652_cast_fp16 = matmul(transpose_x = var_2652_transpose_x_0, transpose_y = var_2652_transpose_y_0, x = transpose_126, y = transpose_127)[name = string("op_2652_cast_fp16")]; fp16 var_2653_to_fp16 = const()[name = string("op_2653_to_fp16"), val = fp16(0x1.6ap-4)]; tensor xscores_29_cast_fp16 = mul(x = var_2652_cast_fp16, y = var_2653_to_fp16)[name = string("xscores_29_cast_fp16")]; fp16 var_2671_to_fp16 = const()[name = string("op_2671_to_fp16"), val = fp16(-inf)]; tensor xscores_31_cast_fp16 = select(a = var_2671_to_fp16, b = xscores_29_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_31_cast_fp16")]; int32 var_2673 = const()[name = string("op_2673"), val = int32(-1)]; tensor xprobs_15_cast_fp16 = softmax(axis = var_2673, x = xscores_31_cast_fp16)[name = string("xprobs_15_cast_fp16")]; bool var_2676_transpose_x_0 = const()[name = string("op_2676_transpose_x_0"), val = bool(false)]; bool var_2676_transpose_y_0 = const()[name = string("op_2676_transpose_y_0"), val = bool(false)]; tensor xa_v_7_to_fp16 = cast(dtype = xa_v_7_to_fp16_dtype_0, x = xa_v_7)[name = string("cast_11")]; tensor xvT_15_cast_fp16 = transpose(perm = var_2650, x = xa_v_7_to_fp16)[name = string("transpose_187")]; tensor var_2676_cast_fp16 = matmul(transpose_x = var_2676_transpose_x_0, transpose_y = var_2676_transpose_y_0, x = xprobs_15_cast_fp16, y = xvT_15_cast_fp16)[name = string("op_2676_cast_fp16")]; tensor var_2681 = const()[name = string("op_2681"), val = tensor([0, 2, 1, 3])]; tensor var_2686 = const()[name = string("op_2686"), val = tensor([1, 1, -1])]; tensor var_2682_cast_fp16 = transpose(perm = var_2681, x = var_2676_cast_fp16)[name = string("transpose_186")]; tensor input_111_cast_fp16 = reshape(shape = var_2686, x = var_2682_cast_fp16)[name = string("input_111_cast_fp16")]; tensor dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55122368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55220736))))[name = string("dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized")]; tensor linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized, x = input_111_cast_fp16)[name = string("linear_31_cast_fp16")]; tensor input_113_cast_fp16 = add(x = input_107_cast_fp16, y = linear_31_cast_fp16)[name = string("input_113_cast_fp16")]; tensor x_57_axes_0 = const()[name = string("x_57_axes_0"), val = tensor([-1])]; tensor dec_layers_7_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_7_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55222336)))]; fp16 var_2694_to_fp16 = const()[name = string("op_2694_to_fp16"), val = fp16(0x1.5p-17)]; tensor x_57_cast_fp16 = layer_norm(axes = x_57_axes_0, epsilon = var_2694_to_fp16, gamma = dec_layers_7_norm_pos_ff_weight_to_fp16, x = input_113_cast_fp16)[name = string("x_57_cast_fp16")]; tensor var_2710 = const()[name = string("op_2710"), val = tensor([0, 2, 1])]; string y_29_pad_type_0 = const()[name = string("y_29_pad_type_0"), val = string("valid")]; tensor y_29_strides_0 = const()[name = string("y_29_strides_0"), val = tensor([1])]; tensor y_29_pad_0 = const()[name = string("y_29_pad_0"), val = tensor([0, 0])]; tensor y_29_dilations_0 = const()[name = string("y_29_dilations_0"), val = tensor([1])]; int32 y_29_groups_0 = const()[name = string("y_29_groups_0"), val = int32(1)]; tensor dec_layers_7_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55223936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57583296))))[name = string("dec_layers_7_pos_ff_proj_weight_to_fp16_quantized")]; tensor x_59_cast_fp16 = transpose(perm = var_2710, x = x_57_cast_fp16)[name = string("transpose_185")]; tensor y_29_cast_fp16 = conv(dilations = y_29_dilations_0, groups = y_29_groups_0, pad = y_29_pad_0, pad_type = y_29_pad_type_0, strides = y_29_strides_0, weight = dec_layers_7_pos_ff_proj_weight_to_fp16_quantized, x = x_59_cast_fp16)[name = string("y_29_cast_fp16")]; string x_61_mode_0 = const()[name = string("x_61_mode_0"), val = string("TANH_APPROXIMATION")]; tensor x_61_cast_fp16 = gelu(mode = x_61_mode_0, x = y_29_cast_fp16)[name = string("x_61_cast_fp16")]; string y_31_pad_type_0 = const()[name = string("y_31_pad_type_0"), val = string("valid")]; tensor y_31_strides_0 = const()[name = string("y_31_strides_0"), val = tensor([1])]; tensor y_31_pad_0 = const()[name = string("y_31_pad_0"), val = tensor([0, 0])]; tensor y_31_dilations_0 = const()[name = string("y_31_dilations_0"), val = tensor([1])]; int32 y_31_groups_0 = const()[name = string("y_31_groups_0"), val = int32(1)]; tensor dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57589504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59948864))))[name = string("dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized")]; tensor y_31_cast_fp16 = conv(dilations = y_31_dilations_0, groups = y_31_groups_0, pad = y_31_pad_0, pad_type = y_31_pad_type_0, strides = y_31_strides_0, weight = dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized, x = x_61_cast_fp16)[name = string("y_31_cast_fp16")]; tensor var_2728 = const()[name = string("op_2728"), val = tensor([0, 2, 1])]; tensor var_2729_cast_fp16 = transpose(perm = var_2728, x = y_31_cast_fp16)[name = string("transpose_184")]; tensor input_115_cast_fp16 = add(x = input_113_cast_fp16, y = var_2729_cast_fp16)[name = string("input_115_cast_fp16")]; tensor input_117_axes_0 = const()[name = string("input_117_axes_0"), val = tensor([-1])]; tensor dec_layers_8_norm_self_weight_to_fp16 = const()[name = string("dec_layers_8_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59950464)))]; fp16 var_2733_to_fp16 = const()[name = string("op_2733_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_117_cast_fp16 = layer_norm(axes = input_117_axes_0, epsilon = var_2733_to_fp16, gamma = dec_layers_8_norm_self_weight_to_fp16, x = input_115_cast_fp16)[name = string("input_117_cast_fp16")]; tensor dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59952064))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61721600))))[name = string("dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized")]; tensor linear_32_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized, x = input_117_cast_fp16)[name = string("linear_32_cast_fp16")]; tensor var_2747 = const()[name = string("op_2747"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_35_cast_fp16 = reshape(shape = var_2747, x = linear_32_cast_fp16)[name = string("qkv_35_cast_fp16")]; tensor q_17_begin_0 = const()[name = string("q_17_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_17_end_0 = const()[name = string("q_17_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_17_end_mask_0 = const()[name = string("q_17_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_17_squeeze_mask_0 = const()[name = string("q_17_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_17_cast_fp16 = slice_by_index(begin = q_17_begin_0, end = q_17_end_0, end_mask = q_17_end_mask_0, squeeze_mask = q_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = string("q_17_cast_fp16")]; tensor new_k_17_begin_0 = const()[name = string("new_k_17_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_17_end_0 = const()[name = string("new_k_17_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_17_end_mask_0 = const()[name = string("new_k_17_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_17_squeeze_mask_0 = const()[name = string("new_k_17_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_17_cast_fp16 = slice_by_index(begin = new_k_17_begin_0, end = new_k_17_end_0, end_mask = new_k_17_end_mask_0, squeeze_mask = new_k_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = string("new_k_17_cast_fp16")]; tensor new_v_17_begin_0 = const()[name = string("new_v_17_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_17_end_0 = const()[name = string("new_v_17_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_17_end_mask_0 = const()[name = string("new_v_17_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_17_squeeze_mask_0 = const()[name = string("new_v_17_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_17_cast_fp16 = slice_by_index(begin = new_v_17_begin_0, end = new_v_17_end_0, end_mask = new_v_17_end_mask_0, squeeze_mask = new_v_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = string("new_v_17_cast_fp16")]; tensor read_state_16 = read_state(input = sa_k_8)[name = string("read_state_16")]; tensor var_2816_cast_fp16 = sub(x = new_k_17_cast_fp16, y = read_state_16)[name = string("op_2816_cast_fp16")]; tensor var_2817_cast_fp16 = mul(x = var_2816_cast_fp16, y = write_oh_b_1)[name = string("op_2817_cast_fp16")]; tensor sa_k_buf_35_cast_fp16 = add(x = read_state_16, y = var_2817_cast_fp16)[name = string("sa_k_buf_35_cast_fp16")]; write_state(data = sa_k_buf_35_cast_fp16, input = sa_k_8)[name = string("coreml_update_state_40_write_state")]; tensor coreml_update_state_40 = read_state(input = sa_k_8)[name = string("coreml_update_state_40")]; tensor read_state_17 = read_state(input = sa_v_8)[name = string("read_state_17")]; tensor var_2821_cast_fp16 = sub(x = new_v_17_cast_fp16, y = read_state_17)[name = string("op_2821_cast_fp16")]; tensor var_2822_cast_fp16 = mul(x = var_2821_cast_fp16, y = write_oh_b_1)[name = string("op_2822_cast_fp16")]; tensor sa_v_buf_35_cast_fp16 = add(x = read_state_17, y = var_2822_cast_fp16)[name = string("sa_v_buf_35_cast_fp16")]; write_state(data = sa_v_buf_35_cast_fp16, input = sa_v_8)[name = string("coreml_update_state_41_write_state")]; tensor coreml_update_state_41 = read_state(input = sa_v_8)[name = string("coreml_update_state_41")]; tensor var_2851 = const()[name = string("op_2851"), val = tensor([0, 2, -3, -1])]; bool var_2853_transpose_x_0 = const()[name = string("op_2853_transpose_x_0"), val = bool(false)]; bool var_2853_transpose_y_0 = const()[name = string("op_2853_transpose_y_0"), val = bool(false)]; tensor transpose_128_perm_0 = const()[name = string("transpose_128_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_129_perm_0 = const()[name = string("transpose_129_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_129 = transpose(perm = transpose_129_perm_0, x = coreml_update_state_40)[name = string("transpose_182")]; tensor transpose_128 = transpose(perm = transpose_128_perm_0, x = q_17_cast_fp16)[name = string("transpose_183")]; tensor var_2853_cast_fp16 = matmul(transpose_x = var_2853_transpose_x_0, transpose_y = var_2853_transpose_y_0, x = transpose_128, y = transpose_129)[name = string("op_2853_cast_fp16")]; fp16 var_2854_to_fp16 = const()[name = string("op_2854_to_fp16"), val = fp16(0x1p-3)]; tensor scores_33_cast_fp16 = mul(x = var_2853_cast_fp16, y = var_2854_to_fp16)[name = string("scores_33_cast_fp16")]; fp16 var_2872_to_fp16 = const()[name = string("op_2872_to_fp16"), val = fp16(-inf)]; tensor scores_35_cast_fp16 = select(a = var_2872_to_fp16, b = scores_33_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_35_cast_fp16")]; int32 var_2874 = const()[name = string("op_2874"), val = int32(-1)]; tensor probs_17_cast_fp16 = softmax(axis = var_2874, x = scores_35_cast_fp16)[name = string("probs_17_cast_fp16")]; bool var_2877_transpose_x_0 = const()[name = string("op_2877_transpose_x_0"), val = bool(false)]; bool var_2877_transpose_y_0 = const()[name = string("op_2877_transpose_y_0"), val = bool(false)]; tensor v_t_17_cast_fp16 = transpose(perm = var_2851, x = coreml_update_state_41)[name = string("transpose_181")]; tensor var_2877_cast_fp16 = matmul(transpose_x = var_2877_transpose_x_0, transpose_y = var_2877_transpose_y_0, x = probs_17_cast_fp16, y = v_t_17_cast_fp16)[name = string("op_2877_cast_fp16")]; tensor var_2882 = const()[name = string("op_2882"), val = tensor([0, 2, 1, 3])]; tensor var_2887 = const()[name = string("op_2887"), val = tensor([1, 1, -1])]; tensor var_2883_cast_fp16 = transpose(perm = var_2882, x = var_2877_cast_fp16)[name = string("transpose_180")]; tensor input_119_cast_fp16 = reshape(shape = var_2887, x = var_2883_cast_fp16)[name = string("input_119_cast_fp16")]; tensor dec_layers_8_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61726272))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62316160))))[name = string("dec_layers_8_self_attention_o_net_weight_to_fp16_quantized")]; tensor linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_8_self_attention_o_net_weight_to_fp16_quantized, x = input_119_cast_fp16)[name = string("linear_33_cast_fp16")]; tensor input_121_cast_fp16 = add(x = input_115_cast_fp16, y = linear_33_cast_fp16)[name = string("input_121_cast_fp16")]; tensor input_123_axes_0 = const()[name = string("input_123_axes_0"), val = tensor([-1])]; tensor dec_layers_8_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_8_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62317760)))]; fp16 var_2895_to_fp16 = const()[name = string("op_2895_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_123_cast_fp16 = layer_norm(axes = input_123_axes_0, epsilon = var_2895_to_fp16, gamma = dec_layers_8_norm_xattn_query_weight_to_fp16, x = input_121_cast_fp16)[name = string("input_123_cast_fp16")]; tensor dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62319360))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62417728))))[name = string("dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized")]; tensor linear_34_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = string("linear_34_cast_fp16")]; tensor var_2908 = const()[name = string("op_2908"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_17_cast_fp16 = reshape(shape = var_2908, x = linear_34_cast_fp16)[name = string("xq_proj_17_cast_fp16")]; tensor var_2926 = const()[name = string("op_2926"), val = tensor([0, 2, -3, -1])]; string xa_v_8_to_fp16_dtype_0 = const()[name = string("xa_v_8_to_fp16_dtype_0"), val = string("fp16")]; bool var_2928_transpose_x_0 = const()[name = string("op_2928_transpose_x_0"), val = bool(false)]; bool var_2928_transpose_y_0 = const()[name = string("op_2928_transpose_y_0"), val = bool(false)]; string xa_k_8_to_fp16_dtype_0 = const()[name = string("xa_k_8_to_fp16_dtype_0"), val = string("fp16")]; tensor transpose_130_perm_0 = const()[name = string("transpose_130_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_131_perm_0 = const()[name = string("transpose_131_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_8_to_fp16 = cast(dtype = xa_k_8_to_fp16_dtype_0, x = xa_k_8)[name = string("cast_10")]; tensor transpose_131 = transpose(perm = transpose_131_perm_0, x = xa_k_8_to_fp16)[name = string("transpose_178")]; tensor transpose_130 = transpose(perm = transpose_130_perm_0, x = xq_proj_17_cast_fp16)[name = string("transpose_179")]; tensor var_2928_cast_fp16 = matmul(transpose_x = var_2928_transpose_x_0, transpose_y = var_2928_transpose_y_0, x = transpose_130, y = transpose_131)[name = string("op_2928_cast_fp16")]; fp16 var_2929_to_fp16 = const()[name = string("op_2929_to_fp16"), val = fp16(0x1.6ap-4)]; tensor xscores_33_cast_fp16 = mul(x = var_2928_cast_fp16, y = var_2929_to_fp16)[name = string("xscores_33_cast_fp16")]; fp16 var_2947_to_fp16 = const()[name = string("op_2947_to_fp16"), val = fp16(-inf)]; tensor xscores_35_cast_fp16 = select(a = var_2947_to_fp16, b = xscores_33_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_35_cast_fp16")]; int32 var_2949 = const()[name = string("op_2949"), val = int32(-1)]; tensor xprobs_17_cast_fp16 = softmax(axis = var_2949, x = xscores_35_cast_fp16)[name = string("xprobs_17_cast_fp16")]; bool var_2952_transpose_x_0 = const()[name = string("op_2952_transpose_x_0"), val = bool(false)]; bool var_2952_transpose_y_0 = const()[name = string("op_2952_transpose_y_0"), val = bool(false)]; tensor xa_v_8_to_fp16 = cast(dtype = xa_v_8_to_fp16_dtype_0, x = xa_v_8)[name = string("cast_9")]; tensor xvT_17_cast_fp16 = transpose(perm = var_2926, x = xa_v_8_to_fp16)[name = string("transpose_177")]; tensor var_2952_cast_fp16 = matmul(transpose_x = var_2952_transpose_x_0, transpose_y = var_2952_transpose_y_0, x = xprobs_17_cast_fp16, y = xvT_17_cast_fp16)[name = string("op_2952_cast_fp16")]; tensor var_2957 = const()[name = string("op_2957"), val = tensor([0, 2, 1, 3])]; tensor var_2962 = const()[name = string("op_2962"), val = tensor([1, 1, -1])]; tensor var_2958_cast_fp16 = transpose(perm = var_2957, x = var_2952_cast_fp16)[name = string("transpose_176")]; tensor input_125_cast_fp16 = reshape(shape = var_2962, x = var_2958_cast_fp16)[name = string("input_125_cast_fp16")]; tensor dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62418048))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62516416))))[name = string("dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized")]; tensor linear_35_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized, x = input_125_cast_fp16)[name = string("linear_35_cast_fp16")]; tensor input_127_cast_fp16 = add(x = input_121_cast_fp16, y = linear_35_cast_fp16)[name = string("input_127_cast_fp16")]; tensor x_65_axes_0 = const()[name = string("x_65_axes_0"), val = tensor([-1])]; tensor dec_layers_8_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_8_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62518016)))]; fp16 var_2970_to_fp16 = const()[name = string("op_2970_to_fp16"), val = fp16(0x1.5p-17)]; tensor x_65_cast_fp16 = layer_norm(axes = x_65_axes_0, epsilon = var_2970_to_fp16, gamma = dec_layers_8_norm_pos_ff_weight_to_fp16, x = input_127_cast_fp16)[name = string("x_65_cast_fp16")]; tensor var_2986 = const()[name = string("op_2986"), val = tensor([0, 2, 1])]; string y_33_pad_type_0 = const()[name = string("y_33_pad_type_0"), val = string("valid")]; tensor y_33_strides_0 = const()[name = string("y_33_strides_0"), val = tensor([1])]; tensor y_33_pad_0 = const()[name = string("y_33_pad_0"), val = tensor([0, 0])]; tensor y_33_dilations_0 = const()[name = string("y_33_dilations_0"), val = tensor([1])]; int32 y_33_groups_0 = const()[name = string("y_33_groups_0"), val = int32(1)]; tensor dec_layers_8_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62519616))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64878976))))[name = string("dec_layers_8_pos_ff_proj_weight_to_fp16_quantized")]; tensor x_67_cast_fp16 = transpose(perm = var_2986, x = x_65_cast_fp16)[name = string("transpose_175")]; tensor y_33_cast_fp16 = conv(dilations = y_33_dilations_0, groups = y_33_groups_0, pad = y_33_pad_0, pad_type = y_33_pad_type_0, strides = y_33_strides_0, weight = dec_layers_8_pos_ff_proj_weight_to_fp16_quantized, x = x_67_cast_fp16)[name = string("y_33_cast_fp16")]; string x_69_mode_0 = const()[name = string("x_69_mode_0"), val = string("TANH_APPROXIMATION")]; tensor x_69_cast_fp16 = gelu(mode = x_69_mode_0, x = y_33_cast_fp16)[name = string("x_69_cast_fp16")]; string y_35_pad_type_0 = const()[name = string("y_35_pad_type_0"), val = string("valid")]; tensor y_35_strides_0 = const()[name = string("y_35_strides_0"), val = tensor([1])]; tensor y_35_pad_0 = const()[name = string("y_35_pad_0"), val = tensor([0, 0])]; tensor y_35_dilations_0 = const()[name = string("y_35_dilations_0"), val = tensor([1])]; int32 y_35_groups_0 = const()[name = string("y_35_groups_0"), val = int32(1)]; tensor dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64885184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67244544))))[name = string("dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized")]; tensor y_35_cast_fp16 = conv(dilations = y_35_dilations_0, groups = y_35_groups_0, pad = y_35_pad_0, pad_type = y_35_pad_type_0, strides = y_35_strides_0, weight = dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized, x = x_69_cast_fp16)[name = string("y_35_cast_fp16")]; tensor var_3004 = const()[name = string("op_3004"), val = tensor([0, 2, 1])]; tensor var_3005_cast_fp16 = transpose(perm = var_3004, x = y_35_cast_fp16)[name = string("transpose_174")]; tensor input_129_cast_fp16 = add(x = input_127_cast_fp16, y = var_3005_cast_fp16)[name = string("input_129_cast_fp16")]; tensor input_131_axes_0 = const()[name = string("input_131_axes_0"), val = tensor([-1])]; tensor dec_layers_9_norm_self_weight_to_fp16 = const()[name = string("dec_layers_9_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67246144)))]; fp16 var_3009_to_fp16 = const()[name = string("op_3009_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_131_cast_fp16 = layer_norm(axes = input_131_axes_0, epsilon = var_3009_to_fp16, gamma = dec_layers_9_norm_self_weight_to_fp16, x = input_129_cast_fp16)[name = string("input_131_cast_fp16")]; tensor dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67247744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69017280))))[name = string("dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized")]; tensor linear_36_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized, x = input_131_cast_fp16)[name = string("linear_36_cast_fp16")]; tensor var_3023 = const()[name = string("op_3023"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_39_cast_fp16 = reshape(shape = var_3023, x = linear_36_cast_fp16)[name = string("qkv_39_cast_fp16")]; tensor q_19_begin_0 = const()[name = string("q_19_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_19_end_0 = const()[name = string("q_19_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_19_end_mask_0 = const()[name = string("q_19_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_19_squeeze_mask_0 = const()[name = string("q_19_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_19_cast_fp16 = slice_by_index(begin = q_19_begin_0, end = q_19_end_0, end_mask = q_19_end_mask_0, squeeze_mask = q_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = string("q_19_cast_fp16")]; tensor new_k_19_begin_0 = const()[name = string("new_k_19_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_19_end_0 = const()[name = string("new_k_19_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_19_end_mask_0 = const()[name = string("new_k_19_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_19_squeeze_mask_0 = const()[name = string("new_k_19_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_19_cast_fp16 = slice_by_index(begin = new_k_19_begin_0, end = new_k_19_end_0, end_mask = new_k_19_end_mask_0, squeeze_mask = new_k_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = string("new_k_19_cast_fp16")]; tensor new_v_19_begin_0 = const()[name = string("new_v_19_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_19_end_0 = const()[name = string("new_v_19_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_19_end_mask_0 = const()[name = string("new_v_19_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_19_squeeze_mask_0 = const()[name = string("new_v_19_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_19_cast_fp16 = slice_by_index(begin = new_v_19_begin_0, end = new_v_19_end_0, end_mask = new_v_19_end_mask_0, squeeze_mask = new_v_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = string("new_v_19_cast_fp16")]; tensor read_state_18 = read_state(input = sa_k_9)[name = string("read_state_18")]; tensor var_3092_cast_fp16 = sub(x = new_k_19_cast_fp16, y = read_state_18)[name = string("op_3092_cast_fp16")]; tensor var_3093_cast_fp16 = mul(x = var_3092_cast_fp16, y = write_oh_b_1)[name = string("op_3093_cast_fp16")]; tensor sa_k_buf_39_cast_fp16 = add(x = read_state_18, y = var_3093_cast_fp16)[name = string("sa_k_buf_39_cast_fp16")]; write_state(data = sa_k_buf_39_cast_fp16, input = sa_k_9)[name = string("coreml_update_state_42_write_state")]; tensor coreml_update_state_42 = read_state(input = sa_k_9)[name = string("coreml_update_state_42")]; tensor read_state_19 = read_state(input = sa_v_9)[name = string("read_state_19")]; tensor var_3097_cast_fp16 = sub(x = new_v_19_cast_fp16, y = read_state_19)[name = string("op_3097_cast_fp16")]; tensor var_3098_cast_fp16 = mul(x = var_3097_cast_fp16, y = write_oh_b_1)[name = string("op_3098_cast_fp16")]; tensor sa_v_buf_39_cast_fp16 = add(x = read_state_19, y = var_3098_cast_fp16)[name = string("sa_v_buf_39_cast_fp16")]; write_state(data = sa_v_buf_39_cast_fp16, input = sa_v_9)[name = string("coreml_update_state_43_write_state")]; tensor coreml_update_state_43 = read_state(input = sa_v_9)[name = string("coreml_update_state_43")]; tensor var_3127 = const()[name = string("op_3127"), val = tensor([0, 2, -3, -1])]; bool var_3129_transpose_x_0 = const()[name = string("op_3129_transpose_x_0"), val = bool(false)]; bool var_3129_transpose_y_0 = const()[name = string("op_3129_transpose_y_0"), val = bool(false)]; tensor transpose_132_perm_0 = const()[name = string("transpose_132_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_133_perm_0 = const()[name = string("transpose_133_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_133 = transpose(perm = transpose_133_perm_0, x = coreml_update_state_42)[name = string("transpose_172")]; tensor transpose_132 = transpose(perm = transpose_132_perm_0, x = q_19_cast_fp16)[name = string("transpose_173")]; tensor var_3129_cast_fp16 = matmul(transpose_x = var_3129_transpose_x_0, transpose_y = var_3129_transpose_y_0, x = transpose_132, y = transpose_133)[name = string("op_3129_cast_fp16")]; fp16 var_3130_to_fp16 = const()[name = string("op_3130_to_fp16"), val = fp16(0x1p-3)]; tensor scores_37_cast_fp16 = mul(x = var_3129_cast_fp16, y = var_3130_to_fp16)[name = string("scores_37_cast_fp16")]; fp16 var_3148_to_fp16 = const()[name = string("op_3148_to_fp16"), val = fp16(-inf)]; tensor scores_39_cast_fp16 = select(a = var_3148_to_fp16, b = scores_37_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_39_cast_fp16")]; int32 var_3150 = const()[name = string("op_3150"), val = int32(-1)]; tensor probs_19_cast_fp16 = softmax(axis = var_3150, x = scores_39_cast_fp16)[name = string("probs_19_cast_fp16")]; bool var_3153_transpose_x_0 = const()[name = string("op_3153_transpose_x_0"), val = bool(false)]; bool var_3153_transpose_y_0 = const()[name = string("op_3153_transpose_y_0"), val = bool(false)]; tensor v_t_19_cast_fp16 = transpose(perm = var_3127, x = coreml_update_state_43)[name = string("transpose_171")]; tensor var_3153_cast_fp16 = matmul(transpose_x = var_3153_transpose_x_0, transpose_y = var_3153_transpose_y_0, x = probs_19_cast_fp16, y = v_t_19_cast_fp16)[name = string("op_3153_cast_fp16")]; tensor var_3158 = const()[name = string("op_3158"), val = tensor([0, 2, 1, 3])]; tensor var_3163 = const()[name = string("op_3163"), val = tensor([1, 1, -1])]; tensor var_3159_cast_fp16 = transpose(perm = var_3158, x = var_3153_cast_fp16)[name = string("transpose_170")]; tensor input_133_cast_fp16 = reshape(shape = var_3163, x = var_3159_cast_fp16)[name = string("input_133_cast_fp16")]; tensor dec_layers_9_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69021952))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69611840))))[name = string("dec_layers_9_self_attention_o_net_weight_to_fp16_quantized")]; tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_9_self_attention_o_net_weight_to_fp16_quantized, x = input_133_cast_fp16)[name = string("linear_37_cast_fp16")]; tensor input_135_cast_fp16 = add(x = input_129_cast_fp16, y = linear_37_cast_fp16)[name = string("input_135_cast_fp16")]; tensor input_137_axes_0 = const()[name = string("input_137_axes_0"), val = tensor([-1])]; tensor dec_layers_9_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_9_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69613440)))]; fp16 var_3171_to_fp16 = const()[name = string("op_3171_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_137_cast_fp16 = layer_norm(axes = input_137_axes_0, epsilon = var_3171_to_fp16, gamma = dec_layers_9_norm_xattn_query_weight_to_fp16, x = input_135_cast_fp16)[name = string("input_137_cast_fp16")]; tensor dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69615040))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69713408))))[name = string("dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized")]; tensor linear_38_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized, x = input_137_cast_fp16)[name = string("linear_38_cast_fp16")]; tensor var_3184 = const()[name = string("op_3184"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_19_cast_fp16 = reshape(shape = var_3184, x = linear_38_cast_fp16)[name = string("xq_proj_19_cast_fp16")]; tensor var_3202 = const()[name = string("op_3202"), val = tensor([0, 2, -3, -1])]; string xa_v_9_to_fp16_dtype_0 = const()[name = string("xa_v_9_to_fp16_dtype_0"), val = string("fp16")]; bool var_3204_transpose_x_0 = const()[name = string("op_3204_transpose_x_0"), val = bool(false)]; bool var_3204_transpose_y_0 = const()[name = string("op_3204_transpose_y_0"), val = bool(false)]; string xa_k_9_to_fp16_dtype_0 = const()[name = string("xa_k_9_to_fp16_dtype_0"), val = string("fp16")]; tensor transpose_134_perm_0 = const()[name = string("transpose_134_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_135_perm_0 = const()[name = string("transpose_135_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_9_to_fp16 = cast(dtype = xa_k_9_to_fp16_dtype_0, x = xa_k_9)[name = string("cast_8")]; tensor transpose_135 = transpose(perm = transpose_135_perm_0, x = xa_k_9_to_fp16)[name = string("transpose_168")]; tensor transpose_134 = transpose(perm = transpose_134_perm_0, x = xq_proj_19_cast_fp16)[name = string("transpose_169")]; tensor var_3204_cast_fp16 = matmul(transpose_x = var_3204_transpose_x_0, transpose_y = var_3204_transpose_y_0, x = transpose_134, y = transpose_135)[name = string("op_3204_cast_fp16")]; fp16 var_3205_to_fp16 = const()[name = string("op_3205_to_fp16"), val = fp16(0x1.6ap-4)]; tensor xscores_37_cast_fp16 = mul(x = var_3204_cast_fp16, y = var_3205_to_fp16)[name = string("xscores_37_cast_fp16")]; fp16 var_3223_to_fp16 = const()[name = string("op_3223_to_fp16"), val = fp16(-inf)]; tensor xscores_39_cast_fp16 = select(a = var_3223_to_fp16, b = xscores_37_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_39_cast_fp16")]; int32 var_3225 = const()[name = string("op_3225"), val = int32(-1)]; tensor xprobs_19_cast_fp16 = softmax(axis = var_3225, x = xscores_39_cast_fp16)[name = string("xprobs_19_cast_fp16")]; bool var_3228_transpose_x_0 = const()[name = string("op_3228_transpose_x_0"), val = bool(false)]; bool var_3228_transpose_y_0 = const()[name = string("op_3228_transpose_y_0"), val = bool(false)]; tensor xa_v_9_to_fp16 = cast(dtype = xa_v_9_to_fp16_dtype_0, x = xa_v_9)[name = string("cast_7")]; tensor xvT_19_cast_fp16 = transpose(perm = var_3202, x = xa_v_9_to_fp16)[name = string("transpose_167")]; tensor var_3228_cast_fp16 = matmul(transpose_x = var_3228_transpose_x_0, transpose_y = var_3228_transpose_y_0, x = xprobs_19_cast_fp16, y = xvT_19_cast_fp16)[name = string("op_3228_cast_fp16")]; tensor var_3233 = const()[name = string("op_3233"), val = tensor([0, 2, 1, 3])]; tensor var_3238 = const()[name = string("op_3238"), val = tensor([1, 1, -1])]; tensor var_3234_cast_fp16 = transpose(perm = var_3233, x = var_3228_cast_fp16)[name = string("transpose_166")]; tensor input_139_cast_fp16 = reshape(shape = var_3238, x = var_3234_cast_fp16)[name = string("input_139_cast_fp16")]; tensor dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69713728))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69812096))))[name = string("dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized")]; tensor linear_39_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized, x = input_139_cast_fp16)[name = string("linear_39_cast_fp16")]; tensor input_141_cast_fp16 = add(x = input_135_cast_fp16, y = linear_39_cast_fp16)[name = string("input_141_cast_fp16")]; tensor x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor([-1])]; tensor dec_layers_9_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_9_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69813696)))]; fp16 var_3246_to_fp16 = const()[name = string("op_3246_to_fp16"), val = fp16(0x1.5p-17)]; tensor x_73_cast_fp16 = layer_norm(axes = x_73_axes_0, epsilon = var_3246_to_fp16, gamma = dec_layers_9_norm_pos_ff_weight_to_fp16, x = input_141_cast_fp16)[name = string("x_73_cast_fp16")]; tensor var_3262 = const()[name = string("op_3262"), val = tensor([0, 2, 1])]; string y_37_pad_type_0 = const()[name = string("y_37_pad_type_0"), val = string("valid")]; tensor y_37_strides_0 = const()[name = string("y_37_strides_0"), val = tensor([1])]; tensor y_37_pad_0 = const()[name = string("y_37_pad_0"), val = tensor([0, 0])]; tensor y_37_dilations_0 = const()[name = string("y_37_dilations_0"), val = tensor([1])]; int32 y_37_groups_0 = const()[name = string("y_37_groups_0"), val = int32(1)]; tensor dec_layers_9_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69815296))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72174656))))[name = string("dec_layers_9_pos_ff_proj_weight_to_fp16_quantized")]; tensor x_75_cast_fp16 = transpose(perm = var_3262, x = x_73_cast_fp16)[name = string("transpose_165")]; tensor y_37_cast_fp16 = conv(dilations = y_37_dilations_0, groups = y_37_groups_0, pad = y_37_pad_0, pad_type = y_37_pad_type_0, strides = y_37_strides_0, weight = dec_layers_9_pos_ff_proj_weight_to_fp16_quantized, x = x_75_cast_fp16)[name = string("y_37_cast_fp16")]; string x_77_mode_0 = const()[name = string("x_77_mode_0"), val = string("TANH_APPROXIMATION")]; tensor x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = y_37_cast_fp16)[name = string("x_77_cast_fp16")]; string y_39_pad_type_0 = const()[name = string("y_39_pad_type_0"), val = string("valid")]; tensor y_39_strides_0 = const()[name = string("y_39_strides_0"), val = tensor([1])]; tensor y_39_pad_0 = const()[name = string("y_39_pad_0"), val = tensor([0, 0])]; tensor y_39_dilations_0 = const()[name = string("y_39_dilations_0"), val = tensor([1])]; int32 y_39_groups_0 = const()[name = string("y_39_groups_0"), val = int32(1)]; tensor dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72180864))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74540224))))[name = string("dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized")]; tensor y_39_cast_fp16 = conv(dilations = y_39_dilations_0, groups = y_39_groups_0, pad = y_39_pad_0, pad_type = y_39_pad_type_0, strides = y_39_strides_0, weight = dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized, x = x_77_cast_fp16)[name = string("y_39_cast_fp16")]; tensor var_3280 = const()[name = string("op_3280"), val = tensor([0, 2, 1])]; tensor var_3281_cast_fp16 = transpose(perm = var_3280, x = y_39_cast_fp16)[name = string("transpose_164")]; tensor input_143_cast_fp16 = add(x = input_141_cast_fp16, y = var_3281_cast_fp16)[name = string("input_143_cast_fp16")]; tensor input_145_axes_0 = const()[name = string("input_145_axes_0"), val = tensor([-1])]; tensor dec_layers_10_norm_self_weight_to_fp16 = const()[name = string("dec_layers_10_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74541824)))]; fp16 var_3285_to_fp16 = const()[name = string("op_3285_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_145_cast_fp16 = layer_norm(axes = input_145_axes_0, epsilon = var_3285_to_fp16, gamma = dec_layers_10_norm_self_weight_to_fp16, x = input_143_cast_fp16)[name = string("input_145_cast_fp16")]; tensor dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74543424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76312960))))[name = string("dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized")]; tensor linear_40_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized, x = input_145_cast_fp16)[name = string("linear_40_cast_fp16")]; tensor var_3299 = const()[name = string("op_3299"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_43_cast_fp16 = reshape(shape = var_3299, x = linear_40_cast_fp16)[name = string("qkv_43_cast_fp16")]; tensor q_21_begin_0 = const()[name = string("q_21_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_21_end_0 = const()[name = string("q_21_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_21_end_mask_0 = const()[name = string("q_21_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_21_squeeze_mask_0 = const()[name = string("q_21_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_21_cast_fp16 = slice_by_index(begin = q_21_begin_0, end = q_21_end_0, end_mask = q_21_end_mask_0, squeeze_mask = q_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = string("q_21_cast_fp16")]; tensor new_k_21_begin_0 = const()[name = string("new_k_21_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_21_end_0 = const()[name = string("new_k_21_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_21_end_mask_0 = const()[name = string("new_k_21_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_21_squeeze_mask_0 = const()[name = string("new_k_21_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_21_cast_fp16 = slice_by_index(begin = new_k_21_begin_0, end = new_k_21_end_0, end_mask = new_k_21_end_mask_0, squeeze_mask = new_k_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = string("new_k_21_cast_fp16")]; tensor new_v_21_begin_0 = const()[name = string("new_v_21_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_21_end_0 = const()[name = string("new_v_21_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_21_end_mask_0 = const()[name = string("new_v_21_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_21_squeeze_mask_0 = const()[name = string("new_v_21_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_21_cast_fp16 = slice_by_index(begin = new_v_21_begin_0, end = new_v_21_end_0, end_mask = new_v_21_end_mask_0, squeeze_mask = new_v_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = string("new_v_21_cast_fp16")]; tensor read_state_20 = read_state(input = sa_k_10)[name = string("read_state_20")]; tensor var_3368_cast_fp16 = sub(x = new_k_21_cast_fp16, y = read_state_20)[name = string("op_3368_cast_fp16")]; tensor var_3369_cast_fp16 = mul(x = var_3368_cast_fp16, y = write_oh_b_1)[name = string("op_3369_cast_fp16")]; tensor sa_k_buf_43_cast_fp16 = add(x = read_state_20, y = var_3369_cast_fp16)[name = string("sa_k_buf_43_cast_fp16")]; write_state(data = sa_k_buf_43_cast_fp16, input = sa_k_10)[name = string("coreml_update_state_44_write_state")]; tensor coreml_update_state_44 = read_state(input = sa_k_10)[name = string("coreml_update_state_44")]; tensor read_state_21 = read_state(input = sa_v_10)[name = string("read_state_21")]; tensor var_3373_cast_fp16 = sub(x = new_v_21_cast_fp16, y = read_state_21)[name = string("op_3373_cast_fp16")]; tensor var_3374_cast_fp16 = mul(x = var_3373_cast_fp16, y = write_oh_b_1)[name = string("op_3374_cast_fp16")]; tensor sa_v_buf_43_cast_fp16 = add(x = read_state_21, y = var_3374_cast_fp16)[name = string("sa_v_buf_43_cast_fp16")]; write_state(data = sa_v_buf_43_cast_fp16, input = sa_v_10)[name = string("coreml_update_state_45_write_state")]; tensor coreml_update_state_45 = read_state(input = sa_v_10)[name = string("coreml_update_state_45")]; tensor var_3403 = const()[name = string("op_3403"), val = tensor([0, 2, -3, -1])]; bool var_3405_transpose_x_0 = const()[name = string("op_3405_transpose_x_0"), val = bool(false)]; bool var_3405_transpose_y_0 = const()[name = string("op_3405_transpose_y_0"), val = bool(false)]; tensor transpose_136_perm_0 = const()[name = string("transpose_136_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_137_perm_0 = const()[name = string("transpose_137_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_137 = transpose(perm = transpose_137_perm_0, x = coreml_update_state_44)[name = string("transpose_162")]; tensor transpose_136 = transpose(perm = transpose_136_perm_0, x = q_21_cast_fp16)[name = string("transpose_163")]; tensor var_3405_cast_fp16 = matmul(transpose_x = var_3405_transpose_x_0, transpose_y = var_3405_transpose_y_0, x = transpose_136, y = transpose_137)[name = string("op_3405_cast_fp16")]; fp16 var_3406_to_fp16 = const()[name = string("op_3406_to_fp16"), val = fp16(0x1p-3)]; tensor scores_41_cast_fp16 = mul(x = var_3405_cast_fp16, y = var_3406_to_fp16)[name = string("scores_41_cast_fp16")]; fp16 var_3424_to_fp16 = const()[name = string("op_3424_to_fp16"), val = fp16(-inf)]; tensor scores_43_cast_fp16 = select(a = var_3424_to_fp16, b = scores_41_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_43_cast_fp16")]; int32 var_3426 = const()[name = string("op_3426"), val = int32(-1)]; tensor probs_21_cast_fp16 = softmax(axis = var_3426, x = scores_43_cast_fp16)[name = string("probs_21_cast_fp16")]; bool var_3429_transpose_x_0 = const()[name = string("op_3429_transpose_x_0"), val = bool(false)]; bool var_3429_transpose_y_0 = const()[name = string("op_3429_transpose_y_0"), val = bool(false)]; tensor v_t_21_cast_fp16 = transpose(perm = var_3403, x = coreml_update_state_45)[name = string("transpose_161")]; tensor var_3429_cast_fp16 = matmul(transpose_x = var_3429_transpose_x_0, transpose_y = var_3429_transpose_y_0, x = probs_21_cast_fp16, y = v_t_21_cast_fp16)[name = string("op_3429_cast_fp16")]; tensor var_3434 = const()[name = string("op_3434"), val = tensor([0, 2, 1, 3])]; tensor var_3439 = const()[name = string("op_3439"), val = tensor([1, 1, -1])]; tensor var_3435_cast_fp16 = transpose(perm = var_3434, x = var_3429_cast_fp16)[name = string("transpose_160")]; tensor input_147_cast_fp16 = reshape(shape = var_3439, x = var_3435_cast_fp16)[name = string("input_147_cast_fp16")]; tensor dec_layers_10_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76317632))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76907520))))[name = string("dec_layers_10_self_attention_o_net_weight_to_fp16_quantized")]; tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_10_self_attention_o_net_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = string("linear_41_cast_fp16")]; tensor input_149_cast_fp16 = add(x = input_143_cast_fp16, y = linear_41_cast_fp16)[name = string("input_149_cast_fp16")]; tensor input_151_axes_0 = const()[name = string("input_151_axes_0"), val = tensor([-1])]; tensor dec_layers_10_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_10_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76909120)))]; fp16 var_3447_to_fp16 = const()[name = string("op_3447_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_151_cast_fp16 = layer_norm(axes = input_151_axes_0, epsilon = var_3447_to_fp16, gamma = dec_layers_10_norm_xattn_query_weight_to_fp16, x = input_149_cast_fp16)[name = string("input_151_cast_fp16")]; tensor dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76910720))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77009088))))[name = string("dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized")]; tensor linear_42_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized, x = input_151_cast_fp16)[name = string("linear_42_cast_fp16")]; tensor var_3460 = const()[name = string("op_3460"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_21_cast_fp16 = reshape(shape = var_3460, x = linear_42_cast_fp16)[name = string("xq_proj_21_cast_fp16")]; tensor var_3478 = const()[name = string("op_3478"), val = tensor([0, 2, -3, -1])]; string xa_v_10_to_fp16_dtype_0 = const()[name = string("xa_v_10_to_fp16_dtype_0"), val = string("fp16")]; bool var_3480_transpose_x_0 = const()[name = string("op_3480_transpose_x_0"), val = bool(false)]; bool var_3480_transpose_y_0 = const()[name = string("op_3480_transpose_y_0"), val = bool(false)]; string xa_k_10_to_fp16_dtype_0 = const()[name = string("xa_k_10_to_fp16_dtype_0"), val = string("fp16")]; tensor transpose_138_perm_0 = const()[name = string("transpose_138_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_139_perm_0 = const()[name = string("transpose_139_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_10_to_fp16 = cast(dtype = xa_k_10_to_fp16_dtype_0, x = xa_k_10)[name = string("cast_6")]; tensor transpose_139 = transpose(perm = transpose_139_perm_0, x = xa_k_10_to_fp16)[name = string("transpose_158")]; tensor transpose_138 = transpose(perm = transpose_138_perm_0, x = xq_proj_21_cast_fp16)[name = string("transpose_159")]; tensor var_3480_cast_fp16 = matmul(transpose_x = var_3480_transpose_x_0, transpose_y = var_3480_transpose_y_0, x = transpose_138, y = transpose_139)[name = string("op_3480_cast_fp16")]; fp16 var_3481_to_fp16 = const()[name = string("op_3481_to_fp16"), val = fp16(0x1.6ap-4)]; tensor xscores_41_cast_fp16 = mul(x = var_3480_cast_fp16, y = var_3481_to_fp16)[name = string("xscores_41_cast_fp16")]; fp16 var_3499_to_fp16 = const()[name = string("op_3499_to_fp16"), val = fp16(-inf)]; tensor xscores_43_cast_fp16 = select(a = var_3499_to_fp16, b = xscores_41_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_43_cast_fp16")]; int32 var_3501 = const()[name = string("op_3501"), val = int32(-1)]; tensor xprobs_21_cast_fp16 = softmax(axis = var_3501, x = xscores_43_cast_fp16)[name = string("xprobs_21_cast_fp16")]; bool var_3504_transpose_x_0 = const()[name = string("op_3504_transpose_x_0"), val = bool(false)]; bool var_3504_transpose_y_0 = const()[name = string("op_3504_transpose_y_0"), val = bool(false)]; tensor xa_v_10_to_fp16 = cast(dtype = xa_v_10_to_fp16_dtype_0, x = xa_v_10)[name = string("cast_5")]; tensor xvT_21_cast_fp16 = transpose(perm = var_3478, x = xa_v_10_to_fp16)[name = string("transpose_157")]; tensor var_3504_cast_fp16 = matmul(transpose_x = var_3504_transpose_x_0, transpose_y = var_3504_transpose_y_0, x = xprobs_21_cast_fp16, y = xvT_21_cast_fp16)[name = string("op_3504_cast_fp16")]; tensor var_3509 = const()[name = string("op_3509"), val = tensor([0, 2, 1, 3])]; tensor var_3514 = const()[name = string("op_3514"), val = tensor([1, 1, -1])]; tensor var_3510_cast_fp16 = transpose(perm = var_3509, x = var_3504_cast_fp16)[name = string("transpose_156")]; tensor input_153_cast_fp16 = reshape(shape = var_3514, x = var_3510_cast_fp16)[name = string("input_153_cast_fp16")]; tensor dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77009408))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77107776))))[name = string("dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized")]; tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized, x = input_153_cast_fp16)[name = string("linear_43_cast_fp16")]; tensor input_155_cast_fp16 = add(x = input_149_cast_fp16, y = linear_43_cast_fp16)[name = string("input_155_cast_fp16")]; tensor x_81_axes_0 = const()[name = string("x_81_axes_0"), val = tensor([-1])]; tensor dec_layers_10_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_10_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77109376)))]; fp16 var_3522_to_fp16 = const()[name = string("op_3522_to_fp16"), val = fp16(0x1.5p-17)]; tensor x_81_cast_fp16 = layer_norm(axes = x_81_axes_0, epsilon = var_3522_to_fp16, gamma = dec_layers_10_norm_pos_ff_weight_to_fp16, x = input_155_cast_fp16)[name = string("x_81_cast_fp16")]; tensor var_3538 = const()[name = string("op_3538"), val = tensor([0, 2, 1])]; string y_41_pad_type_0 = const()[name = string("y_41_pad_type_0"), val = string("valid")]; tensor y_41_strides_0 = const()[name = string("y_41_strides_0"), val = tensor([1])]; tensor y_41_pad_0 = const()[name = string("y_41_pad_0"), val = tensor([0, 0])]; tensor y_41_dilations_0 = const()[name = string("y_41_dilations_0"), val = tensor([1])]; int32 y_41_groups_0 = const()[name = string("y_41_groups_0"), val = int32(1)]; tensor dec_layers_10_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77110976))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79470336))))[name = string("dec_layers_10_pos_ff_proj_weight_to_fp16_quantized")]; tensor x_83_cast_fp16 = transpose(perm = var_3538, x = x_81_cast_fp16)[name = string("transpose_155")]; tensor y_41_cast_fp16 = conv(dilations = y_41_dilations_0, groups = y_41_groups_0, pad = y_41_pad_0, pad_type = y_41_pad_type_0, strides = y_41_strides_0, weight = dec_layers_10_pos_ff_proj_weight_to_fp16_quantized, x = x_83_cast_fp16)[name = string("y_41_cast_fp16")]; string x_85_mode_0 = const()[name = string("x_85_mode_0"), val = string("TANH_APPROXIMATION")]; tensor x_85_cast_fp16 = gelu(mode = x_85_mode_0, x = y_41_cast_fp16)[name = string("x_85_cast_fp16")]; string y_43_pad_type_0 = const()[name = string("y_43_pad_type_0"), val = string("valid")]; tensor y_43_strides_0 = const()[name = string("y_43_strides_0"), val = tensor([1])]; tensor y_43_pad_0 = const()[name = string("y_43_pad_0"), val = tensor([0, 0])]; tensor y_43_dilations_0 = const()[name = string("y_43_dilations_0"), val = tensor([1])]; int32 y_43_groups_0 = const()[name = string("y_43_groups_0"), val = int32(1)]; tensor dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79476544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81835904))))[name = string("dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized")]; tensor y_43_cast_fp16 = conv(dilations = y_43_dilations_0, groups = y_43_groups_0, pad = y_43_pad_0, pad_type = y_43_pad_type_0, strides = y_43_strides_0, weight = dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized, x = x_85_cast_fp16)[name = string("y_43_cast_fp16")]; tensor var_3556 = const()[name = string("op_3556"), val = tensor([0, 2, 1])]; tensor var_3557_cast_fp16 = transpose(perm = var_3556, x = y_43_cast_fp16)[name = string("transpose_154")]; tensor input_157_cast_fp16 = add(x = input_155_cast_fp16, y = var_3557_cast_fp16)[name = string("input_157_cast_fp16")]; tensor input_159_axes_0 = const()[name = string("input_159_axes_0"), val = tensor([-1])]; tensor dec_layers_11_norm_self_weight_to_fp16 = const()[name = string("dec_layers_11_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81837504)))]; fp16 var_3561_to_fp16 = const()[name = string("op_3561_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_159_cast_fp16 = layer_norm(axes = input_159_axes_0, epsilon = var_3561_to_fp16, gamma = dec_layers_11_norm_self_weight_to_fp16, x = input_157_cast_fp16)[name = string("input_159_cast_fp16")]; tensor dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81839104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83608640))))[name = string("dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized")]; tensor linear_44_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized, x = input_159_cast_fp16)[name = string("linear_44_cast_fp16")]; tensor var_3575 = const()[name = string("op_3575"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_cast_fp16 = reshape(shape = var_3575, x = linear_44_cast_fp16)[name = string("qkv_cast_fp16")]; tensor q_begin_0 = const()[name = string("q_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_end_0 = const()[name = string("q_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_end_mask_0 = const()[name = string("q_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_squeeze_mask_0 = const()[name = string("q_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_cast_fp16 = slice_by_index(begin = q_begin_0, end = q_end_0, end_mask = q_end_mask_0, squeeze_mask = q_squeeze_mask_0, x = qkv_cast_fp16)[name = string("q_cast_fp16")]; tensor new_k_begin_0 = const()[name = string("new_k_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_end_0 = const()[name = string("new_k_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_end_mask_0 = const()[name = string("new_k_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_squeeze_mask_0 = const()[name = string("new_k_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_cast_fp16 = slice_by_index(begin = new_k_begin_0, end = new_k_end_0, end_mask = new_k_end_mask_0, squeeze_mask = new_k_squeeze_mask_0, x = qkv_cast_fp16)[name = string("new_k_cast_fp16")]; tensor new_v_begin_0 = const()[name = string("new_v_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_end_0 = const()[name = string("new_v_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_end_mask_0 = const()[name = string("new_v_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_squeeze_mask_0 = const()[name = string("new_v_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_cast_fp16 = slice_by_index(begin = new_v_begin_0, end = new_v_end_0, end_mask = new_v_end_mask_0, squeeze_mask = new_v_squeeze_mask_0, x = qkv_cast_fp16)[name = string("new_v_cast_fp16")]; tensor read_state_22 = read_state(input = sa_k_11)[name = string("read_state_22")]; tensor var_3644_cast_fp16 = sub(x = new_k_cast_fp16, y = read_state_22)[name = string("op_3644_cast_fp16")]; tensor var_3645_cast_fp16 = mul(x = var_3644_cast_fp16, y = write_oh_b_1)[name = string("op_3645_cast_fp16")]; tensor sa_k_buf_cast_fp16 = add(x = read_state_22, y = var_3645_cast_fp16)[name = string("sa_k_buf_cast_fp16")]; write_state(data = sa_k_buf_cast_fp16, input = sa_k_11)[name = string("coreml_update_state_46_write_state")]; tensor coreml_update_state_46 = read_state(input = sa_k_11)[name = string("coreml_update_state_46")]; tensor read_state_23 = read_state(input = sa_v_11)[name = string("read_state_23")]; tensor var_3649_cast_fp16 = sub(x = new_v_cast_fp16, y = read_state_23)[name = string("op_3649_cast_fp16")]; tensor var_3650_cast_fp16 = mul(x = var_3649_cast_fp16, y = write_oh_b_1)[name = string("op_3650_cast_fp16")]; tensor sa_v_buf_cast_fp16 = add(x = read_state_23, y = var_3650_cast_fp16)[name = string("sa_v_buf_cast_fp16")]; write_state(data = sa_v_buf_cast_fp16, input = sa_v_11)[name = string("coreml_update_state_47_write_state")]; tensor coreml_update_state_47 = read_state(input = sa_v_11)[name = string("coreml_update_state_47")]; tensor var_3679 = const()[name = string("op_3679"), val = tensor([0, 2, -3, -1])]; bool var_3681_transpose_x_0 = const()[name = string("op_3681_transpose_x_0"), val = bool(false)]; bool var_3681_transpose_y_0 = const()[name = string("op_3681_transpose_y_0"), val = bool(false)]; tensor transpose_140_perm_0 = const()[name = string("transpose_140_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_141_perm_0 = const()[name = string("transpose_141_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_141 = transpose(perm = transpose_141_perm_0, x = coreml_update_state_46)[name = string("transpose_152")]; tensor transpose_140 = transpose(perm = transpose_140_perm_0, x = q_cast_fp16)[name = string("transpose_153")]; tensor var_3681_cast_fp16 = matmul(transpose_x = var_3681_transpose_x_0, transpose_y = var_3681_transpose_y_0, x = transpose_140, y = transpose_141)[name = string("op_3681_cast_fp16")]; fp16 var_3682_to_fp16 = const()[name = string("op_3682_to_fp16"), val = fp16(0x1p-3)]; tensor scores_45_cast_fp16 = mul(x = var_3681_cast_fp16, y = var_3682_to_fp16)[name = string("scores_45_cast_fp16")]; fp16 var_3700_to_fp16 = const()[name = string("op_3700_to_fp16"), val = fp16(-inf)]; tensor scores_cast_fp16 = select(a = var_3700_to_fp16, b = scores_45_cast_fp16, cond = var_663_cast_fp16)[name = string("scores_cast_fp16")]; int32 var_3702 = const()[name = string("op_3702"), val = int32(-1)]; tensor probs_cast_fp16 = softmax(axis = var_3702, x = scores_cast_fp16)[name = string("probs_cast_fp16")]; bool var_3705_transpose_x_0 = const()[name = string("op_3705_transpose_x_0"), val = bool(false)]; bool var_3705_transpose_y_0 = const()[name = string("op_3705_transpose_y_0"), val = bool(false)]; tensor v_t_cast_fp16 = transpose(perm = var_3679, x = coreml_update_state_47)[name = string("transpose_151")]; tensor var_3705_cast_fp16 = matmul(transpose_x = var_3705_transpose_x_0, transpose_y = var_3705_transpose_y_0, x = probs_cast_fp16, y = v_t_cast_fp16)[name = string("op_3705_cast_fp16")]; tensor var_3710 = const()[name = string("op_3710"), val = tensor([0, 2, 1, 3])]; tensor var_3715 = const()[name = string("op_3715"), val = tensor([1, 1, -1])]; tensor var_3711_cast_fp16 = transpose(perm = var_3710, x = var_3705_cast_fp16)[name = string("transpose_150")]; tensor input_161_cast_fp16 = reshape(shape = var_3715, x = var_3711_cast_fp16)[name = string("input_161_cast_fp16")]; tensor dec_layers_11_self_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83613312))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84203200))))[name = string("dec_layers_11_self_attention_o_net_weight_to_fp16_quantized")]; tensor linear_45_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_11_self_attention_o_net_weight_to_fp16_quantized, x = input_161_cast_fp16)[name = string("linear_45_cast_fp16")]; tensor input_163_cast_fp16 = add(x = input_157_cast_fp16, y = linear_45_cast_fp16)[name = string("input_163_cast_fp16")]; tensor input_165_axes_0 = const()[name = string("input_165_axes_0"), val = tensor([-1])]; tensor dec_layers_11_norm_xattn_query_weight_to_fp16 = const()[name = string("dec_layers_11_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84204800)))]; fp16 var_3723_to_fp16 = const()[name = string("op_3723_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_165_cast_fp16 = layer_norm(axes = input_165_axes_0, epsilon = var_3723_to_fp16, gamma = dec_layers_11_norm_xattn_query_weight_to_fp16, x = input_163_cast_fp16)[name = string("input_165_cast_fp16")]; tensor dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84206400))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84304768))))[name = string("dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized")]; tensor linear_46_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized, x = input_165_cast_fp16)[name = string("linear_46_cast_fp16")]; tensor var_3736 = const()[name = string("op_3736"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_cast_fp16 = reshape(shape = var_3736, x = linear_46_cast_fp16)[name = string("xq_proj_cast_fp16")]; tensor var_3754 = const()[name = string("op_3754"), val = tensor([0, 2, -3, -1])]; string xa_v_11_to_fp16_dtype_0 = const()[name = string("xa_v_11_to_fp16_dtype_0"), val = string("fp16")]; bool var_3756_transpose_x_0 = const()[name = string("op_3756_transpose_x_0"), val = bool(false)]; bool var_3756_transpose_y_0 = const()[name = string("op_3756_transpose_y_0"), val = bool(false)]; string xa_k_11_to_fp16_dtype_0 = const()[name = string("xa_k_11_to_fp16_dtype_0"), val = string("fp16")]; tensor transpose_142_perm_0 = const()[name = string("transpose_142_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_143_perm_0 = const()[name = string("transpose_143_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_11_to_fp16 = cast(dtype = xa_k_11_to_fp16_dtype_0, x = xa_k_11)[name = string("cast_4")]; tensor transpose_143 = transpose(perm = transpose_143_perm_0, x = xa_k_11_to_fp16)[name = string("transpose_148")]; tensor transpose_142 = transpose(perm = transpose_142_perm_0, x = xq_proj_cast_fp16)[name = string("transpose_149")]; tensor var_3756_cast_fp16 = matmul(transpose_x = var_3756_transpose_x_0, transpose_y = var_3756_transpose_y_0, x = transpose_142, y = transpose_143)[name = string("op_3756_cast_fp16")]; fp16 var_3757_to_fp16 = const()[name = string("op_3757_to_fp16"), val = fp16(0x1.6ap-4)]; tensor xscores_45_cast_fp16 = mul(x = var_3756_cast_fp16, y = var_3757_to_fp16)[name = string("xscores_45_cast_fp16")]; fp16 var_3775_to_fp16 = const()[name = string("op_3775_to_fp16"), val = fp16(-inf)]; tensor xscores_cast_fp16 = select(a = var_3775_to_fp16, b = xscores_45_cast_fp16, cond = var_738_cast_fp16)[name = string("xscores_cast_fp16")]; int32 var_3777 = const()[name = string("op_3777"), val = int32(-1)]; tensor xprobs_cast_fp16 = softmax(axis = var_3777, x = xscores_cast_fp16)[name = string("xprobs_cast_fp16")]; bool var_3780_transpose_x_0 = const()[name = string("op_3780_transpose_x_0"), val = bool(false)]; bool var_3780_transpose_y_0 = const()[name = string("op_3780_transpose_y_0"), val = bool(false)]; tensor xa_v_11_to_fp16 = cast(dtype = xa_v_11_to_fp16_dtype_0, x = xa_v_11)[name = string("cast_3")]; tensor xvT_cast_fp16 = transpose(perm = var_3754, x = xa_v_11_to_fp16)[name = string("transpose_147")]; tensor var_3780_cast_fp16 = matmul(transpose_x = var_3780_transpose_x_0, transpose_y = var_3780_transpose_y_0, x = xprobs_cast_fp16, y = xvT_cast_fp16)[name = string("op_3780_cast_fp16")]; tensor var_3785 = const()[name = string("op_3785"), val = tensor([0, 2, 1, 3])]; tensor var_3790 = const()[name = string("op_3790"), val = tensor([1, 1, -1])]; tensor var_3786_cast_fp16 = transpose(perm = var_3785, x = var_3780_cast_fp16)[name = string("transpose_146")]; tensor input_167_cast_fp16 = reshape(shape = var_3790, x = var_3786_cast_fp16)[name = string("input_167_cast_fp16")]; tensor dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84305088))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84403456))))[name = string("dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized")]; tensor linear_47_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized, x = input_167_cast_fp16)[name = string("linear_47_cast_fp16")]; tensor input_169_cast_fp16 = add(x = input_163_cast_fp16, y = linear_47_cast_fp16)[name = string("input_169_cast_fp16")]; tensor x_89_axes_0 = const()[name = string("x_89_axes_0"), val = tensor([-1])]; tensor dec_layers_11_norm_pos_ff_weight_to_fp16 = const()[name = string("dec_layers_11_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84405056)))]; fp16 var_3798_to_fp16 = const()[name = string("op_3798_to_fp16"), val = fp16(0x1.5p-17)]; tensor x_89_cast_fp16 = layer_norm(axes = x_89_axes_0, epsilon = var_3798_to_fp16, gamma = dec_layers_11_norm_pos_ff_weight_to_fp16, x = input_169_cast_fp16)[name = string("x_89_cast_fp16")]; tensor var_3814 = const()[name = string("op_3814"), val = tensor([0, 2, 1])]; string y_45_pad_type_0 = const()[name = string("y_45_pad_type_0"), val = string("valid")]; tensor y_45_strides_0 = const()[name = string("y_45_strides_0"), val = tensor([1])]; tensor y_45_pad_0 = const()[name = string("y_45_pad_0"), val = tensor([0, 0])]; tensor y_45_dilations_0 = const()[name = string("y_45_dilations_0"), val = tensor([1])]; int32 y_45_groups_0 = const()[name = string("y_45_groups_0"), val = int32(1)]; tensor dec_layers_11_pos_ff_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84406656))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86766016))))[name = string("dec_layers_11_pos_ff_proj_weight_to_fp16_quantized")]; tensor x_91_cast_fp16 = transpose(perm = var_3814, x = x_89_cast_fp16)[name = string("transpose_145")]; tensor y_45_cast_fp16 = conv(dilations = y_45_dilations_0, groups = y_45_groups_0, pad = y_45_pad_0, pad_type = y_45_pad_type_0, strides = y_45_strides_0, weight = dec_layers_11_pos_ff_proj_weight_to_fp16_quantized, x = x_91_cast_fp16)[name = string("y_45_cast_fp16")]; string x_93_mode_0 = const()[name = string("x_93_mode_0"), val = string("TANH_APPROXIMATION")]; tensor x_93_cast_fp16 = gelu(mode = x_93_mode_0, x = y_45_cast_fp16)[name = string("x_93_cast_fp16")]; string y_pad_type_0 = const()[name = string("y_pad_type_0"), val = string("valid")]; tensor y_strides_0 = const()[name = string("y_strides_0"), val = tensor([1])]; tensor y_pad_0 = const()[name = string("y_pad_0"), val = tensor([0, 0])]; tensor y_dilations_0 = const()[name = string("y_dilations_0"), val = tensor([1])]; int32 y_groups_0 = const()[name = string("y_groups_0"), val = int32(1)]; tensor dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86772224))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89131584))))[name = string("dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized")]; tensor y_cast_fp16 = conv(dilations = y_dilations_0, groups = y_groups_0, pad = y_pad_0, pad_type = y_pad_type_0, strides = y_strides_0, weight = dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized, x = x_93_cast_fp16)[name = string("y_cast_fp16")]; tensor var_3832 = const()[name = string("op_3832"), val = tensor([0, 2, 1])]; tensor var_3833_cast_fp16 = transpose(perm = var_3832, x = y_cast_fp16)[name = string("transpose_144")]; tensor input_171_cast_fp16 = add(x = input_169_cast_fp16, y = var_3833_cast_fp16)[name = string("input_171_cast_fp16")]; tensor input_axes_0 = const()[name = string("input_axes_0"), val = tensor([-1])]; tensor dec_norm_out_weight_to_fp16 = const()[name = string("dec_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89133184)))]; fp16 var_3837_to_fp16 = const()[name = string("op_3837_to_fp16"), val = fp16(0x1.5p-17)]; tensor input_cast_fp16 = layer_norm(axes = input_axes_0, epsilon = var_3837_to_fp16, gamma = dec_norm_out_weight_to_fp16, x = input_171_cast_fp16)[name = string("input_cast_fp16")]; string input_cast_fp16_to_fp32_dtype_0 = const()[name = string("input_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; tensor dec_final_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89134784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101570304))))[name = string("dec_final_proj_weight_to_fp16_quantized")]; tensor dec_final_proj_bias_to_fp16 = const()[name = string("dec_final_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101602752)))]; tensor linear_48_cast_fp16 = linear(bias = dec_final_proj_bias_to_fp16, weight = dec_final_proj_weight_to_fp16_quantized, x = input_cast_fp16)[name = string("linear_48_cast_fp16")]; tensor var_3850 = const()[name = string("op_3850"), val = tensor([1, 1, 8, 2024])]; tensor var_3851_cast_fp16 = reshape(shape = var_3850, x = linear_48_cast_fp16)[name = string("op_3851_cast_fp16")]; string var_3851_cast_fp16_to_fp32_dtype_0 = const()[name = string("op_3851_cast_fp16_to_fp32_dtype_0"), val = string("fp32")]; tensor h_last = cast(dtype = input_cast_fp16_to_fp32_dtype_0, x = input_cast_fp16)[name = string("cast_1")]; tensor logits = cast(dtype = var_3851_cast_fp16_to_fp32_dtype_0, x = var_3851_cast_fp16)[name = string("cast_2")]; tensor encoder_output_tmp = identity(x = encoder_output)[name = string("encoder_output_tmp")]; } -> (logits, h_last); }