program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}})] { func main(tensor audio_emb, tensor encoder_mask, tensor encoder_output, tensor position, tensor sa_k_in_0, tensor sa_k_in_1, tensor sa_k_in_10, tensor sa_k_in_11, tensor sa_k_in_2, tensor sa_k_in_3, tensor sa_k_in_4, tensor sa_k_in_5, tensor sa_k_in_6, tensor sa_k_in_7, tensor sa_k_in_8, tensor sa_k_in_9, tensor sa_v_in_0, tensor sa_v_in_1, tensor sa_v_in_10, tensor sa_v_in_11, tensor sa_v_in_2, tensor sa_v_in_3, tensor sa_v_in_4, tensor sa_v_in_5, tensor sa_v_in_6, tensor sa_v_in_7, tensor sa_v_in_8, tensor sa_v_in_9, tensor xa_k_0, tensor xa_k_1, tensor xa_k_10, tensor xa_k_11, tensor xa_k_2, tensor xa_k_3, tensor xa_k_4, tensor xa_k_5, tensor xa_k_6, tensor xa_k_7, tensor xa_k_8, tensor xa_k_9, tensor xa_v_0, tensor xa_v_1, tensor xa_v_10, tensor xa_v_11, tensor xa_v_2, tensor xa_v_3, tensor xa_v_4, tensor xa_v_5, tensor xa_v_6, tensor xa_v_7, tensor xa_v_8, tensor xa_v_9) { tensor var_502_batch_dims_0 = const()[name = tensor("op_502_batch_dims_0"), val = tensor(0)]; tensor var_502_validate_indices_0 = const()[name = tensor("op_502_validate_indices_0"), val = tensor(false)]; tensor dec_position_embeddings_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_position_embeddings_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1575104))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1572992)))]; tensor position_to_int16_dtype_0 = const()[name = tensor("position_to_int16_dtype_0"), val = tensor("int16")]; tensor cast_63_dtype_0 = const()[name = tensor("cast_63_dtype_0"), val = tensor("int32")]; tensor greater_equal_0_y_0 = const()[name = tensor("greater_equal_0_y_0"), val = tensor(0)]; tensor position_to_int16 = cast(dtype = position_to_int16_dtype_0, x = position)[name = tensor("cast_83")]; tensor cast_63 = cast(dtype = cast_63_dtype_0, x = position_to_int16)[name = tensor("cast_82")]; tensor greater_equal_0 = greater_equal(x = cast_63, y = greater_equal_0_y_0)[name = tensor("greater_equal_0")]; tensor slice_by_index_0 = const()[name = tensor("slice_by_index_0"), val = tensor(2048)]; tensor add_0 = add(x = cast_63, y = slice_by_index_0)[name = tensor("add_0")]; tensor select_0 = select(a = cast_63, b = add_0, cond = greater_equal_0)[name = tensor("select_0")]; tensor select_0_to_int16_dtype_0 = const()[name = tensor("select_0_to_int16_dtype_0"), val = tensor("int16")]; tensor cast_0_dtype_0 = const()[name = tensor("cast_0_dtype_0"), val = tensor("int32")]; tensor greater_equal_0_y_0_1 = const()[name = tensor("greater_equal_0_y_0_1"), val = tensor(0)]; tensor select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = tensor("cast_81")]; tensor cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = tensor("cast_80")]; tensor greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = tensor("greater_equal_0_1")]; tensor slice_by_index_0_1 = const()[name = tensor("slice_by_index_0_1"), val = tensor(2048)]; tensor add_0_1 = add(x = cast_0, y = slice_by_index_0_1)[name = tensor("add_0_1")]; tensor select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = tensor("select_0_1")]; tensor op_502_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = tensor("op_502_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = tensor(0)]; tensor op_502_cast_fp16_cast_uint16_cast_uint16 = gather(axis = op_502_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = var_502_batch_dims_0, indices = select_0_1, validate_indices = var_502_validate_indices_0, x = dec_position_embeddings_weight_to_fp16_quantized)[name = tensor("op_502_cast_fp16_cast_uint16_cast_uint16")]; tensor audio_emb_to_fp16_dtype_0 = const()[name = tensor("audio_emb_to_fp16_dtype_0"), val = tensor("fp16")]; tensor audio_emb_to_fp16 = cast(dtype = audio_emb_to_fp16_dtype_0, x = audio_emb)[name = tensor("cast_79")]; tensor input_3_cast_fp16 = add(x = audio_emb_to_fp16, y = op_502_cast_fp16_cast_uint16_cast_uint16)[name = tensor("input_3_cast_fp16")]; tensor idx_range_promoted_to_fp16 = const()[name = tensor("idx_range_promoted_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1579264)))]; tensor var_515_to_fp16_dtype_0 = const()[name = tensor("op_515_to_fp16_dtype_0"), val = tensor("fp16")]; tensor position_to_fp16 = cast(dtype = var_515_to_fp16_dtype_0, x = position)[name = tensor("cast_78")]; tensor var_516_cast_fp16 = less_equal(x = idx_range_promoted_to_fp16, y = position_to_fp16)[name = tensor("op_516_cast_fp16")]; tensor sa_key_mask_axes_0 = const()[name = tensor("sa_key_mask_axes_0"), val = tensor([0])]; tensor sa_key_mask_1_to_fp16_dtype_0 = const()[name = tensor("sa_key_mask_1_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_516_cast_fp16_to_fp16 = cast(dtype = sa_key_mask_1_to_fp16_dtype_0, x = var_516_cast_fp16)[name = tensor("cast_77")]; tensor sa_key_mask_cast_fp16 = expand_dims(axes = sa_key_mask_axes_0, x = var_516_cast_fp16_to_fp16)[name = tensor("sa_key_mask_cast_fp16")]; tensor input_5_axes_0 = const()[name = tensor("input_5_axes_0"), val = tensor([-1])]; tensor dec_layers_0_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_0_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1580544)))]; tensor var_525_to_fp16 = const()[name = tensor("op_525_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_5_cast_fp16 = layer_norm(axes = input_5_axes_0, epsilon = var_525_to_fp16, gamma = dec_layers_0_norm_self_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("input_5_cast_fp16")]; tensor dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1582144))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3354048))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3351680)))]; tensor linear_0_bias_0_to_fp16 = const()[name = tensor("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3358720)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized, x = input_5_cast_fp16)[name = tensor("linear_0_cast_fp16")]; tensor var_539 = const()[name = tensor("op_539"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_3_cast_fp16 = reshape(shape = var_539, x = linear_0_cast_fp16)[name = tensor("qkv_3_cast_fp16")]; tensor q_1_begin_0 = const()[name = tensor("q_1_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_1_end_0 = const()[name = tensor("q_1_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_1_end_mask_0 = const()[name = tensor("q_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_1_squeeze_mask_0 = const()[name = tensor("q_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_1_cast_fp16 = slice_by_index(begin = q_1_begin_0, end = q_1_end_0, end_mask = q_1_end_mask_0, squeeze_mask = q_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor("q_1_cast_fp16")]; tensor new_k_1_begin_0 = const()[name = tensor("new_k_1_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_1_end_0 = const()[name = tensor("new_k_1_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_1_end_mask_0 = const()[name = tensor("new_k_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_1_squeeze_mask_0 = const()[name = tensor("new_k_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_1_cast_fp16 = slice_by_index(begin = new_k_1_begin_0, end = new_k_1_end_0, end_mask = new_k_1_end_mask_0, squeeze_mask = new_k_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor("new_k_1_cast_fp16")]; tensor new_v_1_begin_0 = const()[name = tensor("new_v_1_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_1_end_0 = const()[name = tensor("new_v_1_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_1_end_mask_0 = const()[name = tensor("new_v_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_1_squeeze_mask_0 = const()[name = tensor("new_v_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_1_cast_fp16 = slice_by_index(begin = new_v_1_begin_0, end = new_v_1_end_0, end_mask = new_v_1_end_mask_0, squeeze_mask = new_v_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor("new_v_1_cast_fp16")]; tensor var_585_cast_fp16 = equal(x = idx_range_promoted_to_fp16, y = position_to_fp16)[name = tensor("op_585_cast_fp16")]; tensor var_595 = const()[name = tensor("op_595"), val = tensor([1, 600, 1, 1])]; tensor write_oh_1_to_fp16_dtype_0 = const()[name = tensor("write_oh_1_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_585_cast_fp16_to_fp16 = cast(dtype = write_oh_1_to_fp16_dtype_0, x = var_585_cast_fp16)[name = tensor("cast_76")]; tensor write_oh_b_1_cast_fp16 = reshape(shape = var_595, x = var_585_cast_fp16_to_fp16)[name = tensor("write_oh_b_1_cast_fp16")]; tensor var_597_to_fp16 = const()[name = tensor("op_597_to_fp16"), val = tensor(0x1p+0)]; tensor var_599_cast_fp16 = sub(x = var_597_to_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_599_cast_fp16")]; tensor sa_k_in_0_to_fp16_dtype_0 = const()[name = tensor("sa_k_in_0_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_k_in_0_to_fp16 = cast(dtype = sa_k_in_0_to_fp16_dtype_0, x = sa_k_in_0)[name = tensor("cast_75")]; tensor var_600_cast_fp16 = mul(x = sa_k_in_0_to_fp16, y = var_599_cast_fp16)[name = tensor("op_600_cast_fp16")]; tensor var_601_cast_fp16 = mul(x = new_k_1_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_601_cast_fp16")]; tensor sa_k_out_1_cast_fp16 = add(x = var_600_cast_fp16, y = var_601_cast_fp16)[name = tensor("sa_k_out_1_cast_fp16")]; tensor sa_k_out_1_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_k_out_1_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor sa_v_in_0_to_fp16_dtype_0 = const()[name = tensor("sa_v_in_0_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_v_in_0_to_fp16 = cast(dtype = sa_v_in_0_to_fp16_dtype_0, x = sa_v_in_0)[name = tensor("cast_74")]; tensor var_607_cast_fp16 = mul(x = sa_v_in_0_to_fp16, y = var_599_cast_fp16)[name = tensor("op_607_cast_fp16")]; tensor var_608_cast_fp16 = mul(x = new_v_1_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_608_cast_fp16")]; tensor sa_v_out_1_cast_fp16 = add(x = var_607_cast_fp16, y = var_608_cast_fp16)[name = tensor("sa_v_out_1_cast_fp16")]; tensor sa_v_out_1_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_v_out_1_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_627 = const()[name = tensor("op_627"), val = tensor([0, 2, -3, -1])]; tensor var_629_transpose_x_0 = const()[name = tensor("op_629_transpose_x_0"), val = tensor(false)]; tensor var_629_transpose_y_0 = const()[name = tensor("op_629_transpose_y_0"), val = tensor(false)]; tensor transpose_96_perm_0 = const()[name = tensor("transpose_96_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_97_perm_0 = const()[name = tensor("transpose_97_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_97 = transpose(perm = transpose_97_perm_0, x = sa_k_out_1_cast_fp16)[name = tensor("transpose_262")]; tensor transpose_96 = transpose(perm = transpose_96_perm_0, x = q_1_cast_fp16)[name = tensor("transpose_263")]; tensor var_629_cast_fp16 = matmul(transpose_x = var_629_transpose_x_0, transpose_y = var_629_transpose_y_0, x = transpose_96, y = transpose_97)[name = tensor("op_629_cast_fp16")]; tensor var_630_to_fp16 = const()[name = tensor("op_630_to_fp16"), val = tensor(0x1p-3)]; tensor scores_1_cast_fp16 = mul(x = var_629_cast_fp16, y = var_630_to_fp16)[name = tensor("scores_1_cast_fp16")]; tensor var_638_axes_0 = const()[name = tensor("op_638_axes_0"), val = tensor([1])]; tensor var_638_cast_fp16 = expand_dims(axes = var_638_axes_0, x = sa_key_mask_cast_fp16)[name = tensor("op_638_cast_fp16")]; tensor var_640_axes_0 = const()[name = tensor("op_640_axes_0"), val = tensor([2])]; tensor var_640_cast_fp16 = expand_dims(axes = var_640_axes_0, x = var_638_cast_fp16)[name = tensor("op_640_cast_fp16")]; tensor var_646_promoted_to_fp16 = const()[name = tensor("op_646_promoted_to_fp16"), val = tensor(0x0p+0)]; tensor var_647_cast_fp16 = equal(x = var_640_cast_fp16, y = var_646_promoted_to_fp16)[name = tensor("op_647_cast_fp16")]; tensor var_648_to_fp16 = const()[name = tensor("op_648_to_fp16"), val = tensor(-inf)]; tensor scores_3_cast_fp16 = select(a = var_648_to_fp16, b = scores_1_cast_fp16, cond = var_647_cast_fp16)[name = tensor("scores_3_cast_fp16")]; tensor var_650 = const()[name = tensor("op_650"), val = tensor(-1)]; tensor probs_1_cast_fp16 = softmax(axis = var_650, x = scores_3_cast_fp16)[name = tensor("probs_1_cast_fp16")]; tensor var_653_transpose_x_0 = const()[name = tensor("op_653_transpose_x_0"), val = tensor(false)]; tensor var_653_transpose_y_0 = const()[name = tensor("op_653_transpose_y_0"), val = tensor(false)]; tensor v_t_1_cast_fp16 = transpose(perm = var_627, x = sa_v_out_1_cast_fp16)[name = tensor("transpose_261")]; tensor var_653_cast_fp16 = matmul(transpose_x = var_653_transpose_x_0, transpose_y = var_653_transpose_y_0, x = probs_1_cast_fp16, y = v_t_1_cast_fp16)[name = tensor("op_653_cast_fp16")]; tensor var_658 = const()[name = tensor("op_658"), val = tensor([0, 2, 1, 3])]; tensor var_663 = const()[name = tensor("op_663"), val = tensor([1, 1, -1])]; tensor var_659_cast_fp16 = transpose(perm = var_658, x = var_653_cast_fp16)[name = tensor("transpose_260")]; tensor input_7_cast_fp16 = reshape(shape = var_663, x = var_659_cast_fp16)[name = tensor("input_7_cast_fp16")]; tensor dec_layers_0_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_0_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3363392))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3954112))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_1_bias_0_to_fp16 = const()[name = tensor("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3955712)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_0_self_attention_o_net_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = tensor("linear_1_cast_fp16")]; tensor input_9_cast_fp16 = add(x = input_3_cast_fp16, y = linear_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; tensor input_11_axes_0 = const()[name = tensor("input_11_axes_0"), val = tensor([-1])]; tensor dec_layers_0_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_0_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3957312)))]; tensor var_671_to_fp16 = const()[name = tensor("op_671_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_11_cast_fp16 = layer_norm(axes = input_11_axes_0, epsilon = var_671_to_fp16, gamma = dec_layers_0_norm_xattn_query_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("input_11_cast_fp16")]; tensor dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3958912))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4057472))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4057280)))]; tensor linear_2_bias_0_to_fp16 = const()[name = tensor("linear_2_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4057792)))]; tensor linear_2_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized, x = input_11_cast_fp16)[name = tensor("linear_2_cast_fp16")]; tensor var_684 = const()[name = tensor("op_684"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_1_cast_fp16 = reshape(shape = var_684, x = linear_2_cast_fp16)[name = tensor("xq_proj_1_cast_fp16")]; tensor var_702 = const()[name = tensor("op_702"), val = tensor([0, 2, -3, -1])]; tensor xa_v_0_to_fp16_dtype_0 = const()[name = tensor("xa_v_0_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_704_transpose_x_0 = const()[name = tensor("op_704_transpose_x_0"), val = tensor(false)]; tensor var_704_transpose_y_0 = const()[name = tensor("op_704_transpose_y_0"), val = tensor(false)]; tensor xa_k_0_to_fp16_dtype_0 = const()[name = tensor("xa_k_0_to_fp16_dtype_0"), val = tensor("fp16")]; tensor transpose_98_perm_0 = const()[name = tensor("transpose_98_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_99_perm_0 = const()[name = tensor("transpose_99_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_0_to_fp16 = cast(dtype = xa_k_0_to_fp16_dtype_0, x = xa_k_0)[name = tensor("cast_73")]; tensor transpose_99 = transpose(perm = transpose_99_perm_0, x = xa_k_0_to_fp16)[name = tensor("transpose_258")]; tensor transpose_98 = transpose(perm = transpose_98_perm_0, x = xq_proj_1_cast_fp16)[name = tensor("transpose_259")]; tensor var_704_cast_fp16 = matmul(transpose_x = var_704_transpose_x_0, transpose_y = var_704_transpose_y_0, x = transpose_98, y = transpose_99)[name = tensor("op_704_cast_fp16")]; tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.6ap-4)]; tensor xscores_1_cast_fp16 = mul(x = var_704_cast_fp16, y = var_705_to_fp16)[name = tensor("xscores_1_cast_fp16")]; tensor var_713_axes_0 = const()[name = tensor("op_713_axes_0"), val = tensor([1])]; tensor encoder_mask_to_fp16_dtype_0 = const()[name = tensor("encoder_mask_to_fp16_dtype_0"), val = tensor("fp16")]; tensor encoder_mask_to_fp16 = cast(dtype = encoder_mask_to_fp16_dtype_0, x = encoder_mask)[name = tensor("cast_72")]; tensor var_713_cast_fp16 = expand_dims(axes = var_713_axes_0, x = encoder_mask_to_fp16)[name = tensor("op_713_cast_fp16")]; tensor var_715_axes_0 = const()[name = tensor("op_715_axes_0"), val = tensor([2])]; tensor var_715_cast_fp16 = expand_dims(axes = var_715_axes_0, x = var_713_cast_fp16)[name = tensor("op_715_cast_fp16")]; tensor var_721_promoted_to_fp16 = const()[name = tensor("op_721_promoted_to_fp16"), val = tensor(0x0p+0)]; tensor var_722_cast_fp16 = equal(x = var_715_cast_fp16, y = var_721_promoted_to_fp16)[name = tensor("op_722_cast_fp16")]; tensor var_723_to_fp16 = const()[name = tensor("op_723_to_fp16"), val = tensor(-inf)]; tensor xscores_3_cast_fp16 = select(a = var_723_to_fp16, b = xscores_1_cast_fp16, cond = var_722_cast_fp16)[name = tensor("xscores_3_cast_fp16")]; tensor var_725 = const()[name = tensor("op_725"), val = tensor(-1)]; tensor xprobs_1_cast_fp16 = softmax(axis = var_725, x = xscores_3_cast_fp16)[name = tensor("xprobs_1_cast_fp16")]; tensor var_728_transpose_x_0 = const()[name = tensor("op_728_transpose_x_0"), val = tensor(false)]; tensor var_728_transpose_y_0 = const()[name = tensor("op_728_transpose_y_0"), val = tensor(false)]; tensor xa_v_0_to_fp16 = cast(dtype = xa_v_0_to_fp16_dtype_0, x = xa_v_0)[name = tensor("cast_71")]; tensor xvT_1_cast_fp16 = transpose(perm = var_702, x = xa_v_0_to_fp16)[name = tensor("transpose_257")]; tensor var_728_cast_fp16 = matmul(transpose_x = var_728_transpose_x_0, transpose_y = var_728_transpose_y_0, x = xprobs_1_cast_fp16, y = xvT_1_cast_fp16)[name = tensor("op_728_cast_fp16")]; tensor var_733 = const()[name = tensor("op_733"), val = tensor([0, 2, 1, 3])]; tensor var_738 = const()[name = tensor("op_738"), val = tensor([1, 1, -1])]; tensor var_734_cast_fp16 = transpose(perm = var_733, x = var_728_cast_fp16)[name = tensor("transpose_256")]; tensor input_13_cast_fp16 = reshape(shape = var_738, x = var_734_cast_fp16)[name = tensor("input_13_cast_fp16")]; tensor dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4058112))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4156480))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_3_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized, x = input_13_cast_fp16)[name = tensor("linear_3_cast_fp16")]; tensor input_15_cast_fp16 = add(x = input_9_cast_fp16, y = linear_3_cast_fp16)[name = tensor("input_15_cast_fp16")]; tensor x_1_axes_0 = const()[name = tensor("x_1_axes_0"), val = tensor([-1])]; tensor dec_layers_0_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_0_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4158080)))]; tensor var_746_to_fp16 = const()[name = tensor("op_746_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_1_cast_fp16 = layer_norm(axes = x_1_axes_0, epsilon = var_746_to_fp16, gamma = dec_layers_0_norm_pos_ff_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_1_cast_fp16")]; tensor var_762 = const()[name = tensor("op_762"), val = tensor([0, 2, 1])]; tensor y_1_pad_type_0 = const()[name = tensor("y_1_pad_type_0"), val = tensor("valid")]; tensor y_1_strides_0 = const()[name = tensor("y_1_strides_0"), val = tensor([1])]; tensor y_1_pad_0 = const()[name = tensor("y_1_pad_0"), val = tensor([0, 0])]; tensor y_1_dilations_0 = const()[name = tensor("y_1_dilations_0"), val = tensor([1])]; tensor y_1_groups_0 = const()[name = tensor("y_1_groups_0"), val = tensor(1)]; tensor dec_layers_0_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_0_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4159680))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6522176))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6519040)))]; tensor x_3_cast_fp16 = transpose(perm = var_762, x = x_1_cast_fp16)[name = tensor("transpose_255")]; tensor y_1_cast_fp16 = conv(dilations = y_1_dilations_0, groups = y_1_groups_0, pad = y_1_pad_0, pad_type = y_1_pad_type_0, strides = y_1_strides_0, weight = dec_layers_0_pos_ff_proj_weight_to_fp16_quantized, x = x_3_cast_fp16)[name = tensor("y_1_cast_fp16")]; tensor x_5_mode_0 = const()[name = tensor("x_5_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_5_cast_fp16 = gelu(mode = x_5_mode_0, x = y_1_cast_fp16)[name = tensor("x_5_cast_fp16")]; tensor y_3_pad_type_0 = const()[name = tensor("y_3_pad_type_0"), val = tensor("valid")]; tensor y_3_strides_0 = const()[name = tensor("y_3_strides_0"), val = tensor([1])]; tensor y_3_pad_0 = const()[name = tensor("y_3_pad_0"), val = tensor([0, 0])]; tensor y_3_dilations_0 = const()[name = tensor("y_3_dilations_0"), val = tensor([1])]; tensor y_3_groups_0 = const()[name = tensor("y_3_groups_0"), val = tensor(1)]; tensor dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6528384))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8887744))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor y_3_cast_fp16 = conv(dilations = y_3_dilations_0, groups = y_3_groups_0, pad = y_3_pad_0, pad_type = y_3_pad_type_0, strides = y_3_strides_0, weight = dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized, x = x_5_cast_fp16)[name = tensor("y_3_cast_fp16")]; tensor var_780 = const()[name = tensor("op_780"), val = tensor([0, 2, 1])]; tensor var_781_cast_fp16 = transpose(perm = var_780, x = y_3_cast_fp16)[name = tensor("transpose_254")]; tensor input_17_cast_fp16 = add(x = input_15_cast_fp16, y = var_781_cast_fp16)[name = tensor("input_17_cast_fp16")]; tensor input_19_axes_0 = const()[name = tensor("input_19_axes_0"), val = tensor([-1])]; tensor dec_layers_1_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_1_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8889344)))]; tensor var_785_to_fp16 = const()[name = tensor("op_785_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_19_cast_fp16 = layer_norm(axes = input_19_axes_0, epsilon = var_785_to_fp16, gamma = dec_layers_1_norm_self_weight_to_fp16, x = input_17_cast_fp16)[name = tensor("input_19_cast_fp16")]; tensor dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8890944))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10660480))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3351680)))]; tensor linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = tensor("linear_4_cast_fp16")]; tensor var_799 = const()[name = tensor("op_799"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_7_cast_fp16 = reshape(shape = var_799, x = linear_4_cast_fp16)[name = tensor("qkv_7_cast_fp16")]; tensor q_3_begin_0 = const()[name = tensor("q_3_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_3_end_0 = const()[name = tensor("q_3_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_3_end_mask_0 = const()[name = tensor("q_3_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_3_squeeze_mask_0 = const()[name = tensor("q_3_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_3_cast_fp16 = slice_by_index(begin = q_3_begin_0, end = q_3_end_0, end_mask = q_3_end_mask_0, squeeze_mask = q_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor("q_3_cast_fp16")]; tensor new_k_3_begin_0 = const()[name = tensor("new_k_3_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_3_end_0 = const()[name = tensor("new_k_3_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_3_end_mask_0 = const()[name = tensor("new_k_3_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_3_squeeze_mask_0 = const()[name = tensor("new_k_3_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_3_cast_fp16 = slice_by_index(begin = new_k_3_begin_0, end = new_k_3_end_0, end_mask = new_k_3_end_mask_0, squeeze_mask = new_k_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor("new_k_3_cast_fp16")]; tensor new_v_3_begin_0 = const()[name = tensor("new_v_3_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_3_end_0 = const()[name = tensor("new_v_3_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_3_end_mask_0 = const()[name = tensor("new_v_3_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_3_squeeze_mask_0 = const()[name = tensor("new_v_3_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_3_cast_fp16 = slice_by_index(begin = new_v_3_begin_0, end = new_v_3_end_0, end_mask = new_v_3_end_mask_0, squeeze_mask = new_v_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor("new_v_3_cast_fp16")]; tensor sa_k_in_1_to_fp16_dtype_0 = const()[name = tensor("sa_k_in_1_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_k_in_1_to_fp16 = cast(dtype = sa_k_in_1_to_fp16_dtype_0, x = sa_k_in_1)[name = tensor("cast_70")]; tensor var_860_cast_fp16 = mul(x = sa_k_in_1_to_fp16, y = var_599_cast_fp16)[name = tensor("op_860_cast_fp16")]; tensor var_861_cast_fp16 = mul(x = new_k_3_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_861_cast_fp16")]; tensor sa_k_out_3_cast_fp16 = add(x = var_860_cast_fp16, y = var_861_cast_fp16)[name = tensor("sa_k_out_3_cast_fp16")]; tensor sa_k_out_3_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_k_out_3_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor sa_v_in_1_to_fp16_dtype_0 = const()[name = tensor("sa_v_in_1_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_v_in_1_to_fp16 = cast(dtype = sa_v_in_1_to_fp16_dtype_0, x = sa_v_in_1)[name = tensor("cast_69")]; tensor var_867_cast_fp16 = mul(x = sa_v_in_1_to_fp16, y = var_599_cast_fp16)[name = tensor("op_867_cast_fp16")]; tensor var_868_cast_fp16 = mul(x = new_v_3_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_868_cast_fp16")]; tensor sa_v_out_3_cast_fp16 = add(x = var_867_cast_fp16, y = var_868_cast_fp16)[name = tensor("sa_v_out_3_cast_fp16")]; tensor sa_v_out_3_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_v_out_3_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_887 = const()[name = tensor("op_887"), val = tensor([0, 2, -3, -1])]; tensor var_889_transpose_x_0 = const()[name = tensor("op_889_transpose_x_0"), val = tensor(false)]; tensor var_889_transpose_y_0 = const()[name = tensor("op_889_transpose_y_0"), val = tensor(false)]; tensor transpose_100_perm_0 = const()[name = tensor("transpose_100_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_101_perm_0 = const()[name = tensor("transpose_101_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_101 = transpose(perm = transpose_101_perm_0, x = sa_k_out_3_cast_fp16)[name = tensor("transpose_252")]; tensor transpose_100 = transpose(perm = transpose_100_perm_0, x = q_3_cast_fp16)[name = tensor("transpose_253")]; tensor var_889_cast_fp16 = matmul(transpose_x = var_889_transpose_x_0, transpose_y = var_889_transpose_y_0, x = transpose_100, y = transpose_101)[name = tensor("op_889_cast_fp16")]; tensor var_890_to_fp16 = const()[name = tensor("op_890_to_fp16"), val = tensor(0x1p-3)]; tensor scores_5_cast_fp16 = mul(x = var_889_cast_fp16, y = var_890_to_fp16)[name = tensor("scores_5_cast_fp16")]; tensor var_908_to_fp16 = const()[name = tensor("op_908_to_fp16"), val = tensor(-inf)]; tensor scores_7_cast_fp16 = select(a = var_908_to_fp16, b = scores_5_cast_fp16, cond = var_647_cast_fp16)[name = tensor("scores_7_cast_fp16")]; tensor var_910 = const()[name = tensor("op_910"), val = tensor(-1)]; tensor probs_3_cast_fp16 = softmax(axis = var_910, x = scores_7_cast_fp16)[name = tensor("probs_3_cast_fp16")]; tensor var_913_transpose_x_0 = const()[name = tensor("op_913_transpose_x_0"), val = tensor(false)]; tensor var_913_transpose_y_0 = const()[name = tensor("op_913_transpose_y_0"), val = tensor(false)]; tensor v_t_3_cast_fp16 = transpose(perm = var_887, x = sa_v_out_3_cast_fp16)[name = tensor("transpose_251")]; tensor var_913_cast_fp16 = matmul(transpose_x = var_913_transpose_x_0, transpose_y = var_913_transpose_y_0, x = probs_3_cast_fp16, y = v_t_3_cast_fp16)[name = tensor("op_913_cast_fp16")]; tensor var_918 = const()[name = tensor("op_918"), val = tensor([0, 2, 1, 3])]; tensor var_923 = const()[name = tensor("op_923"), val = tensor([1, 1, -1])]; tensor var_919_cast_fp16 = transpose(perm = var_918, x = var_913_cast_fp16)[name = tensor("transpose_250")]; tensor input_21_cast_fp16 = reshape(shape = var_923, x = var_919_cast_fp16)[name = tensor("input_21_cast_fp16")]; tensor dec_layers_1_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_1_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10665152))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11255040))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_5_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_1_self_attention_o_net_weight_to_fp16_quantized, x = input_21_cast_fp16)[name = tensor("linear_5_cast_fp16")]; tensor input_23_cast_fp16 = add(x = input_17_cast_fp16, y = linear_5_cast_fp16)[name = tensor("input_23_cast_fp16")]; tensor input_25_axes_0 = const()[name = tensor("input_25_axes_0"), val = tensor([-1])]; tensor dec_layers_1_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_1_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11256640)))]; tensor var_931_to_fp16 = const()[name = tensor("op_931_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_25_cast_fp16 = layer_norm(axes = input_25_axes_0, epsilon = var_931_to_fp16, gamma = dec_layers_1_norm_xattn_query_weight_to_fp16, x = input_23_cast_fp16)[name = tensor("input_25_cast_fp16")]; tensor dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11258240))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11356608))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4057280)))]; tensor linear_6_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized, x = input_25_cast_fp16)[name = tensor("linear_6_cast_fp16")]; tensor var_944 = const()[name = tensor("op_944"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_3_cast_fp16 = reshape(shape = var_944, x = linear_6_cast_fp16)[name = tensor("xq_proj_3_cast_fp16")]; tensor var_962 = const()[name = tensor("op_962"), val = tensor([0, 2, -3, -1])]; tensor xa_v_1_to_fp16_dtype_0 = const()[name = tensor("xa_v_1_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_964_transpose_x_0 = const()[name = tensor("op_964_transpose_x_0"), val = tensor(false)]; tensor var_964_transpose_y_0 = const()[name = tensor("op_964_transpose_y_0"), val = tensor(false)]; tensor xa_k_1_to_fp16_dtype_0 = const()[name = tensor("xa_k_1_to_fp16_dtype_0"), val = tensor("fp16")]; tensor transpose_102_perm_0 = const()[name = tensor("transpose_102_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_103_perm_0 = const()[name = tensor("transpose_103_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_1_to_fp16 = cast(dtype = xa_k_1_to_fp16_dtype_0, x = xa_k_1)[name = tensor("cast_68")]; tensor transpose_103 = transpose(perm = transpose_103_perm_0, x = xa_k_1_to_fp16)[name = tensor("transpose_248")]; tensor transpose_102 = transpose(perm = transpose_102_perm_0, x = xq_proj_3_cast_fp16)[name = tensor("transpose_249")]; tensor var_964_cast_fp16 = matmul(transpose_x = var_964_transpose_x_0, transpose_y = var_964_transpose_y_0, x = transpose_102, y = transpose_103)[name = tensor("op_964_cast_fp16")]; tensor var_965_to_fp16 = const()[name = tensor("op_965_to_fp16"), val = tensor(0x1.6ap-4)]; tensor xscores_5_cast_fp16 = mul(x = var_964_cast_fp16, y = var_965_to_fp16)[name = tensor("xscores_5_cast_fp16")]; tensor var_983_to_fp16 = const()[name = tensor("op_983_to_fp16"), val = tensor(-inf)]; tensor xscores_7_cast_fp16 = select(a = var_983_to_fp16, b = xscores_5_cast_fp16, cond = var_722_cast_fp16)[name = tensor("xscores_7_cast_fp16")]; tensor var_985 = const()[name = tensor("op_985"), val = tensor(-1)]; tensor xprobs_3_cast_fp16 = softmax(axis = var_985, x = xscores_7_cast_fp16)[name = tensor("xprobs_3_cast_fp16")]; tensor var_988_transpose_x_0 = const()[name = tensor("op_988_transpose_x_0"), val = tensor(false)]; tensor var_988_transpose_y_0 = const()[name = tensor("op_988_transpose_y_0"), val = tensor(false)]; tensor xa_v_1_to_fp16 = cast(dtype = xa_v_1_to_fp16_dtype_0, x = xa_v_1)[name = tensor("cast_67")]; tensor xvT_3_cast_fp16 = transpose(perm = var_962, x = xa_v_1_to_fp16)[name = tensor("transpose_247")]; tensor var_988_cast_fp16 = matmul(transpose_x = var_988_transpose_x_0, transpose_y = var_988_transpose_y_0, x = xprobs_3_cast_fp16, y = xvT_3_cast_fp16)[name = tensor("op_988_cast_fp16")]; tensor var_993 = const()[name = tensor("op_993"), val = tensor([0, 2, 1, 3])]; tensor var_998 = const()[name = tensor("op_998"), val = tensor([1, 1, -1])]; tensor var_994_cast_fp16 = transpose(perm = var_993, x = var_988_cast_fp16)[name = tensor("transpose_246")]; tensor input_27_cast_fp16 = reshape(shape = var_998, x = var_994_cast_fp16)[name = tensor("input_27_cast_fp16")]; tensor dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11356928))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11455296))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = tensor("linear_7_cast_fp16")]; tensor input_29_cast_fp16 = add(x = input_23_cast_fp16, y = linear_7_cast_fp16)[name = tensor("input_29_cast_fp16")]; tensor x_9_axes_0 = const()[name = tensor("x_9_axes_0"), val = tensor([-1])]; tensor dec_layers_1_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_1_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11456896)))]; tensor var_1006_to_fp16 = const()[name = tensor("op_1006_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_9_cast_fp16 = layer_norm(axes = x_9_axes_0, epsilon = var_1006_to_fp16, gamma = dec_layers_1_norm_pos_ff_weight_to_fp16, x = input_29_cast_fp16)[name = tensor("x_9_cast_fp16")]; tensor var_1022 = const()[name = tensor("op_1022"), val = tensor([0, 2, 1])]; tensor y_5_pad_type_0 = const()[name = tensor("y_5_pad_type_0"), val = tensor("valid")]; tensor y_5_strides_0 = const()[name = tensor("y_5_strides_0"), val = tensor([1])]; tensor y_5_pad_0 = const()[name = tensor("y_5_pad_0"), val = tensor([0, 0])]; tensor y_5_dilations_0 = const()[name = tensor("y_5_dilations_0"), val = tensor([1])]; tensor y_5_groups_0 = const()[name = tensor("y_5_groups_0"), val = tensor(1)]; tensor dec_layers_1_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_1_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11458496))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13817856))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6519040)))]; tensor x_11_cast_fp16 = transpose(perm = var_1022, x = x_9_cast_fp16)[name = tensor("transpose_245")]; tensor y_5_cast_fp16 = conv(dilations = y_5_dilations_0, groups = y_5_groups_0, pad = y_5_pad_0, pad_type = y_5_pad_type_0, strides = y_5_strides_0, weight = dec_layers_1_pos_ff_proj_weight_to_fp16_quantized, x = x_11_cast_fp16)[name = tensor("y_5_cast_fp16")]; tensor x_13_mode_0 = const()[name = tensor("x_13_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_13_cast_fp16 = gelu(mode = x_13_mode_0, x = y_5_cast_fp16)[name = tensor("x_13_cast_fp16")]; tensor y_7_pad_type_0 = const()[name = tensor("y_7_pad_type_0"), val = tensor("valid")]; tensor y_7_strides_0 = const()[name = tensor("y_7_strides_0"), val = tensor([1])]; tensor y_7_pad_0 = const()[name = tensor("y_7_pad_0"), val = tensor([0, 0])]; tensor y_7_dilations_0 = const()[name = tensor("y_7_dilations_0"), val = tensor([1])]; tensor y_7_groups_0 = const()[name = tensor("y_7_groups_0"), val = tensor(1)]; tensor dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13824064))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16183424))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor y_7_cast_fp16 = conv(dilations = y_7_dilations_0, groups = y_7_groups_0, pad = y_7_pad_0, pad_type = y_7_pad_type_0, strides = y_7_strides_0, weight = dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized, x = x_13_cast_fp16)[name = tensor("y_7_cast_fp16")]; tensor var_1040 = const()[name = tensor("op_1040"), val = tensor([0, 2, 1])]; tensor var_1041_cast_fp16 = transpose(perm = var_1040, x = y_7_cast_fp16)[name = tensor("transpose_244")]; tensor input_31_cast_fp16 = add(x = input_29_cast_fp16, y = var_1041_cast_fp16)[name = tensor("input_31_cast_fp16")]; tensor input_33_axes_0 = const()[name = tensor("input_33_axes_0"), val = tensor([-1])]; tensor dec_layers_2_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_2_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16185024)))]; tensor var_1045_to_fp16 = const()[name = tensor("op_1045_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, epsilon = var_1045_to_fp16, gamma = dec_layers_2_norm_self_weight_to_fp16, x = input_31_cast_fp16)[name = tensor("input_33_cast_fp16")]; tensor dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16186624))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17956160))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3351680)))]; tensor linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized, x = input_33_cast_fp16)[name = tensor("linear_8_cast_fp16")]; tensor var_1059 = const()[name = tensor("op_1059"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_11_cast_fp16 = reshape(shape = var_1059, x = linear_8_cast_fp16)[name = tensor("qkv_11_cast_fp16")]; tensor q_5_begin_0 = const()[name = tensor("q_5_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_5_end_0 = const()[name = tensor("q_5_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_5_end_mask_0 = const()[name = tensor("q_5_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_5_squeeze_mask_0 = const()[name = tensor("q_5_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_5_cast_fp16 = slice_by_index(begin = q_5_begin_0, end = q_5_end_0, end_mask = q_5_end_mask_0, squeeze_mask = q_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor("q_5_cast_fp16")]; tensor new_k_5_begin_0 = const()[name = tensor("new_k_5_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_5_end_0 = const()[name = tensor("new_k_5_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_5_end_mask_0 = const()[name = tensor("new_k_5_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_5_squeeze_mask_0 = const()[name = tensor("new_k_5_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_5_cast_fp16 = slice_by_index(begin = new_k_5_begin_0, end = new_k_5_end_0, end_mask = new_k_5_end_mask_0, squeeze_mask = new_k_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor("new_k_5_cast_fp16")]; tensor new_v_5_begin_0 = const()[name = tensor("new_v_5_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_5_end_0 = const()[name = tensor("new_v_5_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_5_end_mask_0 = const()[name = tensor("new_v_5_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_5_squeeze_mask_0 = const()[name = tensor("new_v_5_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_5_cast_fp16 = slice_by_index(begin = new_v_5_begin_0, end = new_v_5_end_0, end_mask = new_v_5_end_mask_0, squeeze_mask = new_v_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor("new_v_5_cast_fp16")]; tensor sa_k_in_2_to_fp16_dtype_0 = const()[name = tensor("sa_k_in_2_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_k_in_2_to_fp16 = cast(dtype = sa_k_in_2_to_fp16_dtype_0, x = sa_k_in_2)[name = tensor("cast_66")]; tensor var_1120_cast_fp16 = mul(x = sa_k_in_2_to_fp16, y = var_599_cast_fp16)[name = tensor("op_1120_cast_fp16")]; tensor var_1121_cast_fp16 = mul(x = new_k_5_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_1121_cast_fp16")]; tensor sa_k_out_5_cast_fp16 = add(x = var_1120_cast_fp16, y = var_1121_cast_fp16)[name = tensor("sa_k_out_5_cast_fp16")]; tensor sa_k_out_5_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_k_out_5_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor sa_v_in_2_to_fp16_dtype_0 = const()[name = tensor("sa_v_in_2_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_v_in_2_to_fp16 = cast(dtype = sa_v_in_2_to_fp16_dtype_0, x = sa_v_in_2)[name = tensor("cast_65")]; tensor var_1127_cast_fp16 = mul(x = sa_v_in_2_to_fp16, y = var_599_cast_fp16)[name = tensor("op_1127_cast_fp16")]; tensor var_1128_cast_fp16 = mul(x = new_v_5_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_1128_cast_fp16")]; tensor sa_v_out_5_cast_fp16 = add(x = var_1127_cast_fp16, y = var_1128_cast_fp16)[name = tensor("sa_v_out_5_cast_fp16")]; tensor sa_v_out_5_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_v_out_5_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_1147 = const()[name = tensor("op_1147"), val = tensor([0, 2, -3, -1])]; tensor var_1149_transpose_x_0 = const()[name = tensor("op_1149_transpose_x_0"), val = tensor(false)]; tensor var_1149_transpose_y_0 = const()[name = tensor("op_1149_transpose_y_0"), val = tensor(false)]; tensor transpose_104_perm_0 = const()[name = tensor("transpose_104_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_105_perm_0 = const()[name = tensor("transpose_105_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_105 = transpose(perm = transpose_105_perm_0, x = sa_k_out_5_cast_fp16)[name = tensor("transpose_242")]; tensor transpose_104 = transpose(perm = transpose_104_perm_0, x = q_5_cast_fp16)[name = tensor("transpose_243")]; tensor var_1149_cast_fp16 = matmul(transpose_x = var_1149_transpose_x_0, transpose_y = var_1149_transpose_y_0, x = transpose_104, y = transpose_105)[name = tensor("op_1149_cast_fp16")]; tensor var_1150_to_fp16 = const()[name = tensor("op_1150_to_fp16"), val = tensor(0x1p-3)]; tensor scores_9_cast_fp16 = mul(x = var_1149_cast_fp16, y = var_1150_to_fp16)[name = tensor("scores_9_cast_fp16")]; tensor var_1168_to_fp16 = const()[name = tensor("op_1168_to_fp16"), val = tensor(-inf)]; tensor scores_11_cast_fp16 = select(a = var_1168_to_fp16, b = scores_9_cast_fp16, cond = var_647_cast_fp16)[name = tensor("scores_11_cast_fp16")]; tensor var_1170 = const()[name = tensor("op_1170"), val = tensor(-1)]; tensor probs_5_cast_fp16 = softmax(axis = var_1170, x = scores_11_cast_fp16)[name = tensor("probs_5_cast_fp16")]; tensor var_1173_transpose_x_0 = const()[name = tensor("op_1173_transpose_x_0"), val = tensor(false)]; tensor var_1173_transpose_y_0 = const()[name = tensor("op_1173_transpose_y_0"), val = tensor(false)]; tensor v_t_5_cast_fp16 = transpose(perm = var_1147, x = sa_v_out_5_cast_fp16)[name = tensor("transpose_241")]; tensor var_1173_cast_fp16 = matmul(transpose_x = var_1173_transpose_x_0, transpose_y = var_1173_transpose_y_0, x = probs_5_cast_fp16, y = v_t_5_cast_fp16)[name = tensor("op_1173_cast_fp16")]; tensor var_1178 = const()[name = tensor("op_1178"), val = tensor([0, 2, 1, 3])]; tensor var_1183 = const()[name = tensor("op_1183"), val = tensor([1, 1, -1])]; tensor var_1179_cast_fp16 = transpose(perm = var_1178, x = var_1173_cast_fp16)[name = tensor("transpose_240")]; tensor input_35_cast_fp16 = reshape(shape = var_1183, x = var_1179_cast_fp16)[name = tensor("input_35_cast_fp16")]; tensor dec_layers_2_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_2_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17960832))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18550720))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_2_self_attention_o_net_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = tensor("linear_9_cast_fp16")]; tensor input_37_cast_fp16 = add(x = input_31_cast_fp16, y = linear_9_cast_fp16)[name = tensor("input_37_cast_fp16")]; tensor input_39_axes_0 = const()[name = tensor("input_39_axes_0"), val = tensor([-1])]; tensor dec_layers_2_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_2_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18552320)))]; tensor var_1191_to_fp16 = const()[name = tensor("op_1191_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_39_cast_fp16 = layer_norm(axes = input_39_axes_0, epsilon = var_1191_to_fp16, gamma = dec_layers_2_norm_xattn_query_weight_to_fp16, x = input_37_cast_fp16)[name = tensor("input_39_cast_fp16")]; tensor dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18553920))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18652288))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4057280)))]; tensor linear_10_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized, x = input_39_cast_fp16)[name = tensor("linear_10_cast_fp16")]; tensor var_1204 = const()[name = tensor("op_1204"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_5_cast_fp16 = reshape(shape = var_1204, x = linear_10_cast_fp16)[name = tensor("xq_proj_5_cast_fp16")]; tensor var_1222 = const()[name = tensor("op_1222"), val = tensor([0, 2, -3, -1])]; tensor xa_v_2_to_fp16_dtype_0 = const()[name = tensor("xa_v_2_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_1224_transpose_x_0 = const()[name = tensor("op_1224_transpose_x_0"), val = tensor(false)]; tensor var_1224_transpose_y_0 = const()[name = tensor("op_1224_transpose_y_0"), val = tensor(false)]; tensor xa_k_2_to_fp16_dtype_0 = const()[name = tensor("xa_k_2_to_fp16_dtype_0"), val = tensor("fp16")]; tensor transpose_106_perm_0 = const()[name = tensor("transpose_106_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_107_perm_0 = const()[name = tensor("transpose_107_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_2_to_fp16 = cast(dtype = xa_k_2_to_fp16_dtype_0, x = xa_k_2)[name = tensor("cast_64")]; tensor transpose_107 = transpose(perm = transpose_107_perm_0, x = xa_k_2_to_fp16)[name = tensor("transpose_238")]; tensor transpose_106 = transpose(perm = transpose_106_perm_0, x = xq_proj_5_cast_fp16)[name = tensor("transpose_239")]; tensor var_1224_cast_fp16 = matmul(transpose_x = var_1224_transpose_x_0, transpose_y = var_1224_transpose_y_0, x = transpose_106, y = transpose_107)[name = tensor("op_1224_cast_fp16")]; tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.6ap-4)]; tensor xscores_9_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("xscores_9_cast_fp16")]; tensor var_1243_to_fp16 = const()[name = tensor("op_1243_to_fp16"), val = tensor(-inf)]; tensor xscores_11_cast_fp16 = select(a = var_1243_to_fp16, b = xscores_9_cast_fp16, cond = var_722_cast_fp16)[name = tensor("xscores_11_cast_fp16")]; tensor var_1245 = const()[name = tensor("op_1245"), val = tensor(-1)]; tensor xprobs_5_cast_fp16 = softmax(axis = var_1245, x = xscores_11_cast_fp16)[name = tensor("xprobs_5_cast_fp16")]; tensor var_1248_transpose_x_0 = const()[name = tensor("op_1248_transpose_x_0"), val = tensor(false)]; tensor var_1248_transpose_y_0 = const()[name = tensor("op_1248_transpose_y_0"), val = tensor(false)]; tensor xa_v_2_to_fp16 = cast(dtype = xa_v_2_to_fp16_dtype_0, x = xa_v_2)[name = tensor("cast_63")]; tensor xvT_5_cast_fp16 = transpose(perm = var_1222, x = xa_v_2_to_fp16)[name = tensor("transpose_237")]; tensor var_1248_cast_fp16 = matmul(transpose_x = var_1248_transpose_x_0, transpose_y = var_1248_transpose_y_0, x = xprobs_5_cast_fp16, y = xvT_5_cast_fp16)[name = tensor("op_1248_cast_fp16")]; tensor var_1253 = const()[name = tensor("op_1253"), val = tensor([0, 2, 1, 3])]; tensor var_1258 = const()[name = tensor("op_1258"), val = tensor([1, 1, -1])]; tensor var_1254_cast_fp16 = transpose(perm = var_1253, x = var_1248_cast_fp16)[name = tensor("transpose_236")]; tensor input_41_cast_fp16 = reshape(shape = var_1258, x = var_1254_cast_fp16)[name = tensor("input_41_cast_fp16")]; tensor dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18652608))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18750976))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_11_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized, x = input_41_cast_fp16)[name = tensor("linear_11_cast_fp16")]; tensor input_43_cast_fp16 = add(x = input_37_cast_fp16, y = linear_11_cast_fp16)[name = tensor("input_43_cast_fp16")]; tensor x_17_axes_0 = const()[name = tensor("x_17_axes_0"), val = tensor([-1])]; tensor dec_layers_2_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_2_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18752576)))]; tensor var_1266_to_fp16 = const()[name = tensor("op_1266_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_17_cast_fp16 = layer_norm(axes = x_17_axes_0, epsilon = var_1266_to_fp16, gamma = dec_layers_2_norm_pos_ff_weight_to_fp16, x = input_43_cast_fp16)[name = tensor("x_17_cast_fp16")]; tensor var_1282 = const()[name = tensor("op_1282"), val = tensor([0, 2, 1])]; tensor y_9_pad_type_0 = const()[name = tensor("y_9_pad_type_0"), val = tensor("valid")]; tensor y_9_strides_0 = const()[name = tensor("y_9_strides_0"), val = tensor([1])]; tensor y_9_pad_0 = const()[name = tensor("y_9_pad_0"), val = tensor([0, 0])]; tensor y_9_dilations_0 = const()[name = tensor("y_9_dilations_0"), val = tensor([1])]; tensor y_9_groups_0 = const()[name = tensor("y_9_groups_0"), val = tensor(1)]; tensor dec_layers_2_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_2_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18754176))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21113536))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6519040)))]; tensor x_19_cast_fp16 = transpose(perm = var_1282, x = x_17_cast_fp16)[name = tensor("transpose_235")]; tensor y_9_cast_fp16 = conv(dilations = y_9_dilations_0, groups = y_9_groups_0, pad = y_9_pad_0, pad_type = y_9_pad_type_0, strides = y_9_strides_0, weight = dec_layers_2_pos_ff_proj_weight_to_fp16_quantized, x = x_19_cast_fp16)[name = tensor("y_9_cast_fp16")]; tensor x_21_mode_0 = const()[name = tensor("x_21_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_21_cast_fp16 = gelu(mode = x_21_mode_0, x = y_9_cast_fp16)[name = tensor("x_21_cast_fp16")]; tensor y_11_pad_type_0 = const()[name = tensor("y_11_pad_type_0"), val = tensor("valid")]; tensor y_11_strides_0 = const()[name = tensor("y_11_strides_0"), val = tensor([1])]; tensor y_11_pad_0 = const()[name = tensor("y_11_pad_0"), val = tensor([0, 0])]; tensor y_11_dilations_0 = const()[name = tensor("y_11_dilations_0"), val = tensor([1])]; tensor y_11_groups_0 = const()[name = tensor("y_11_groups_0"), val = tensor(1)]; tensor dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21119744))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23479104))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor y_11_cast_fp16 = conv(dilations = y_11_dilations_0, groups = y_11_groups_0, pad = y_11_pad_0, pad_type = y_11_pad_type_0, strides = y_11_strides_0, weight = dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized, x = x_21_cast_fp16)[name = tensor("y_11_cast_fp16")]; tensor var_1300 = const()[name = tensor("op_1300"), val = tensor([0, 2, 1])]; tensor var_1301_cast_fp16 = transpose(perm = var_1300, x = y_11_cast_fp16)[name = tensor("transpose_234")]; tensor input_45_cast_fp16 = add(x = input_43_cast_fp16, y = var_1301_cast_fp16)[name = tensor("input_45_cast_fp16")]; tensor input_47_axes_0 = const()[name = tensor("input_47_axes_0"), val = tensor([-1])]; tensor dec_layers_3_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_3_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23480704)))]; tensor var_1305_to_fp16 = const()[name = tensor("op_1305_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, epsilon = var_1305_to_fp16, gamma = dec_layers_3_norm_self_weight_to_fp16, x = input_45_cast_fp16)[name = tensor("input_47_cast_fp16")]; tensor dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23482304))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25251840))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3351680)))]; tensor linear_12_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized, x = input_47_cast_fp16)[name = tensor("linear_12_cast_fp16")]; tensor var_1319 = const()[name = tensor("op_1319"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_15_cast_fp16 = reshape(shape = var_1319, x = linear_12_cast_fp16)[name = tensor("qkv_15_cast_fp16")]; tensor q_7_begin_0 = const()[name = tensor("q_7_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_7_end_0 = const()[name = tensor("q_7_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_7_end_mask_0 = const()[name = tensor("q_7_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_7_squeeze_mask_0 = const()[name = tensor("q_7_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_7_cast_fp16 = slice_by_index(begin = q_7_begin_0, end = q_7_end_0, end_mask = q_7_end_mask_0, squeeze_mask = q_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor("q_7_cast_fp16")]; tensor new_k_7_begin_0 = const()[name = tensor("new_k_7_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_7_end_0 = const()[name = tensor("new_k_7_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_7_end_mask_0 = const()[name = tensor("new_k_7_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_7_squeeze_mask_0 = const()[name = tensor("new_k_7_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_7_cast_fp16 = slice_by_index(begin = new_k_7_begin_0, end = new_k_7_end_0, end_mask = new_k_7_end_mask_0, squeeze_mask = new_k_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor("new_k_7_cast_fp16")]; tensor new_v_7_begin_0 = const()[name = tensor("new_v_7_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_7_end_0 = const()[name = tensor("new_v_7_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_7_end_mask_0 = const()[name = tensor("new_v_7_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_7_squeeze_mask_0 = const()[name = tensor("new_v_7_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_7_cast_fp16 = slice_by_index(begin = new_v_7_begin_0, end = new_v_7_end_0, end_mask = new_v_7_end_mask_0, squeeze_mask = new_v_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor("new_v_7_cast_fp16")]; tensor sa_k_in_3_to_fp16_dtype_0 = const()[name = tensor("sa_k_in_3_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_k_in_3_to_fp16 = cast(dtype = sa_k_in_3_to_fp16_dtype_0, x = sa_k_in_3)[name = tensor("cast_62")]; tensor var_1380_cast_fp16 = mul(x = sa_k_in_3_to_fp16, y = var_599_cast_fp16)[name = tensor("op_1380_cast_fp16")]; tensor var_1381_cast_fp16 = mul(x = new_k_7_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_1381_cast_fp16")]; tensor sa_k_out_7_cast_fp16 = add(x = var_1380_cast_fp16, y = var_1381_cast_fp16)[name = tensor("sa_k_out_7_cast_fp16")]; tensor sa_k_out_7_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_k_out_7_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor sa_v_in_3_to_fp16_dtype_0 = const()[name = tensor("sa_v_in_3_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_v_in_3_to_fp16 = cast(dtype = sa_v_in_3_to_fp16_dtype_0, x = sa_v_in_3)[name = tensor("cast_61")]; tensor var_1387_cast_fp16 = mul(x = sa_v_in_3_to_fp16, y = var_599_cast_fp16)[name = tensor("op_1387_cast_fp16")]; tensor var_1388_cast_fp16 = mul(x = new_v_7_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_1388_cast_fp16")]; tensor sa_v_out_7_cast_fp16 = add(x = var_1387_cast_fp16, y = var_1388_cast_fp16)[name = tensor("sa_v_out_7_cast_fp16")]; tensor sa_v_out_7_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_v_out_7_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_1407 = const()[name = tensor("op_1407"), val = tensor([0, 2, -3, -1])]; tensor var_1409_transpose_x_0 = const()[name = tensor("op_1409_transpose_x_0"), val = tensor(false)]; tensor var_1409_transpose_y_0 = const()[name = tensor("op_1409_transpose_y_0"), val = tensor(false)]; tensor transpose_108_perm_0 = const()[name = tensor("transpose_108_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_109_perm_0 = const()[name = tensor("transpose_109_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_109 = transpose(perm = transpose_109_perm_0, x = sa_k_out_7_cast_fp16)[name = tensor("transpose_232")]; tensor transpose_108 = transpose(perm = transpose_108_perm_0, x = q_7_cast_fp16)[name = tensor("transpose_233")]; tensor var_1409_cast_fp16 = matmul(transpose_x = var_1409_transpose_x_0, transpose_y = var_1409_transpose_y_0, x = transpose_108, y = transpose_109)[name = tensor("op_1409_cast_fp16")]; tensor var_1410_to_fp16 = const()[name = tensor("op_1410_to_fp16"), val = tensor(0x1p-3)]; tensor scores_13_cast_fp16 = mul(x = var_1409_cast_fp16, y = var_1410_to_fp16)[name = tensor("scores_13_cast_fp16")]; tensor var_1428_to_fp16 = const()[name = tensor("op_1428_to_fp16"), val = tensor(-inf)]; tensor scores_15_cast_fp16 = select(a = var_1428_to_fp16, b = scores_13_cast_fp16, cond = var_647_cast_fp16)[name = tensor("scores_15_cast_fp16")]; tensor var_1430 = const()[name = tensor("op_1430"), val = tensor(-1)]; tensor probs_7_cast_fp16 = softmax(axis = var_1430, x = scores_15_cast_fp16)[name = tensor("probs_7_cast_fp16")]; tensor var_1433_transpose_x_0 = const()[name = tensor("op_1433_transpose_x_0"), val = tensor(false)]; tensor var_1433_transpose_y_0 = const()[name = tensor("op_1433_transpose_y_0"), val = tensor(false)]; tensor v_t_7_cast_fp16 = transpose(perm = var_1407, x = sa_v_out_7_cast_fp16)[name = tensor("transpose_231")]; tensor var_1433_cast_fp16 = matmul(transpose_x = var_1433_transpose_x_0, transpose_y = var_1433_transpose_y_0, x = probs_7_cast_fp16, y = v_t_7_cast_fp16)[name = tensor("op_1433_cast_fp16")]; tensor var_1438 = const()[name = tensor("op_1438"), val = tensor([0, 2, 1, 3])]; tensor var_1443 = const()[name = tensor("op_1443"), val = tensor([1, 1, -1])]; tensor var_1439_cast_fp16 = transpose(perm = var_1438, x = var_1433_cast_fp16)[name = tensor("transpose_230")]; tensor input_49_cast_fp16 = reshape(shape = var_1443, x = var_1439_cast_fp16)[name = tensor("input_49_cast_fp16")]; tensor dec_layers_3_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_3_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25256512))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25846400))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_3_self_attention_o_net_weight_to_fp16_quantized, x = input_49_cast_fp16)[name = tensor("linear_13_cast_fp16")]; tensor input_51_cast_fp16 = add(x = input_45_cast_fp16, y = linear_13_cast_fp16)[name = tensor("input_51_cast_fp16")]; tensor input_53_axes_0 = const()[name = tensor("input_53_axes_0"), val = tensor([-1])]; tensor dec_layers_3_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_3_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25848000)))]; tensor var_1451_to_fp16 = const()[name = tensor("op_1451_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_53_cast_fp16 = layer_norm(axes = input_53_axes_0, epsilon = var_1451_to_fp16, gamma = dec_layers_3_norm_xattn_query_weight_to_fp16, x = input_51_cast_fp16)[name = tensor("input_53_cast_fp16")]; tensor dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25849600))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25947968))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4057280)))]; tensor linear_14_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized, x = input_53_cast_fp16)[name = tensor("linear_14_cast_fp16")]; tensor var_1464 = const()[name = tensor("op_1464"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_7_cast_fp16 = reshape(shape = var_1464, x = linear_14_cast_fp16)[name = tensor("xq_proj_7_cast_fp16")]; tensor var_1482 = const()[name = tensor("op_1482"), val = tensor([0, 2, -3, -1])]; tensor xa_v_3_to_fp16_dtype_0 = const()[name = tensor("xa_v_3_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_1484_transpose_x_0 = const()[name = tensor("op_1484_transpose_x_0"), val = tensor(false)]; tensor var_1484_transpose_y_0 = const()[name = tensor("op_1484_transpose_y_0"), val = tensor(false)]; tensor xa_k_3_to_fp16_dtype_0 = const()[name = tensor("xa_k_3_to_fp16_dtype_0"), val = tensor("fp16")]; tensor transpose_110_perm_0 = const()[name = tensor("transpose_110_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_111_perm_0 = const()[name = tensor("transpose_111_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_3_to_fp16 = cast(dtype = xa_k_3_to_fp16_dtype_0, x = xa_k_3)[name = tensor("cast_60")]; tensor transpose_111 = transpose(perm = transpose_111_perm_0, x = xa_k_3_to_fp16)[name = tensor("transpose_228")]; tensor transpose_110 = transpose(perm = transpose_110_perm_0, x = xq_proj_7_cast_fp16)[name = tensor("transpose_229")]; tensor var_1484_cast_fp16 = matmul(transpose_x = var_1484_transpose_x_0, transpose_y = var_1484_transpose_y_0, x = transpose_110, y = transpose_111)[name = tensor("op_1484_cast_fp16")]; tensor var_1485_to_fp16 = const()[name = tensor("op_1485_to_fp16"), val = tensor(0x1.6ap-4)]; tensor xscores_13_cast_fp16 = mul(x = var_1484_cast_fp16, y = var_1485_to_fp16)[name = tensor("xscores_13_cast_fp16")]; tensor var_1503_to_fp16 = const()[name = tensor("op_1503_to_fp16"), val = tensor(-inf)]; tensor xscores_15_cast_fp16 = select(a = var_1503_to_fp16, b = xscores_13_cast_fp16, cond = var_722_cast_fp16)[name = tensor("xscores_15_cast_fp16")]; tensor var_1505 = const()[name = tensor("op_1505"), val = tensor(-1)]; tensor xprobs_7_cast_fp16 = softmax(axis = var_1505, x = xscores_15_cast_fp16)[name = tensor("xprobs_7_cast_fp16")]; tensor var_1508_transpose_x_0 = const()[name = tensor("op_1508_transpose_x_0"), val = tensor(false)]; tensor var_1508_transpose_y_0 = const()[name = tensor("op_1508_transpose_y_0"), val = tensor(false)]; tensor xa_v_3_to_fp16 = cast(dtype = xa_v_3_to_fp16_dtype_0, x = xa_v_3)[name = tensor("cast_59")]; tensor xvT_7_cast_fp16 = transpose(perm = var_1482, x = xa_v_3_to_fp16)[name = tensor("transpose_227")]; tensor var_1508_cast_fp16 = matmul(transpose_x = var_1508_transpose_x_0, transpose_y = var_1508_transpose_y_0, x = xprobs_7_cast_fp16, y = xvT_7_cast_fp16)[name = tensor("op_1508_cast_fp16")]; tensor var_1513 = const()[name = tensor("op_1513"), val = tensor([0, 2, 1, 3])]; tensor var_1518 = const()[name = tensor("op_1518"), val = tensor([1, 1, -1])]; tensor var_1514_cast_fp16 = transpose(perm = var_1513, x = var_1508_cast_fp16)[name = tensor("transpose_226")]; tensor input_55_cast_fp16 = reshape(shape = var_1518, x = var_1514_cast_fp16)[name = tensor("input_55_cast_fp16")]; tensor dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25948288))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26046656))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_15_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized, x = input_55_cast_fp16)[name = tensor("linear_15_cast_fp16")]; tensor input_57_cast_fp16 = add(x = input_51_cast_fp16, y = linear_15_cast_fp16)[name = tensor("input_57_cast_fp16")]; tensor x_25_axes_0 = const()[name = tensor("x_25_axes_0"), val = tensor([-1])]; tensor dec_layers_3_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_3_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26048256)))]; tensor var_1526_to_fp16 = const()[name = tensor("op_1526_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_25_cast_fp16 = layer_norm(axes = x_25_axes_0, epsilon = var_1526_to_fp16, gamma = dec_layers_3_norm_pos_ff_weight_to_fp16, x = input_57_cast_fp16)[name = tensor("x_25_cast_fp16")]; tensor var_1542 = const()[name = tensor("op_1542"), val = tensor([0, 2, 1])]; tensor y_13_pad_type_0 = const()[name = tensor("y_13_pad_type_0"), val = tensor("valid")]; tensor y_13_strides_0 = const()[name = tensor("y_13_strides_0"), val = tensor([1])]; tensor y_13_pad_0 = const()[name = tensor("y_13_pad_0"), val = tensor([0, 0])]; tensor y_13_dilations_0 = const()[name = tensor("y_13_dilations_0"), val = tensor([1])]; tensor y_13_groups_0 = const()[name = tensor("y_13_groups_0"), val = tensor(1)]; tensor dec_layers_3_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_3_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26049856))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28409216))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6519040)))]; tensor x_27_cast_fp16 = transpose(perm = var_1542, x = x_25_cast_fp16)[name = tensor("transpose_225")]; tensor y_13_cast_fp16 = conv(dilations = y_13_dilations_0, groups = y_13_groups_0, pad = y_13_pad_0, pad_type = y_13_pad_type_0, strides = y_13_strides_0, weight = dec_layers_3_pos_ff_proj_weight_to_fp16_quantized, x = x_27_cast_fp16)[name = tensor("y_13_cast_fp16")]; tensor x_29_mode_0 = const()[name = tensor("x_29_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = y_13_cast_fp16)[name = tensor("x_29_cast_fp16")]; tensor y_15_pad_type_0 = const()[name = tensor("y_15_pad_type_0"), val = tensor("valid")]; tensor y_15_strides_0 = const()[name = tensor("y_15_strides_0"), val = tensor([1])]; tensor y_15_pad_0 = const()[name = tensor("y_15_pad_0"), val = tensor([0, 0])]; tensor y_15_dilations_0 = const()[name = tensor("y_15_dilations_0"), val = tensor([1])]; tensor y_15_groups_0 = const()[name = tensor("y_15_groups_0"), val = tensor(1)]; tensor dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28415424))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30774784))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor y_15_cast_fp16 = conv(dilations = y_15_dilations_0, groups = y_15_groups_0, pad = y_15_pad_0, pad_type = y_15_pad_type_0, strides = y_15_strides_0, weight = dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized, x = x_29_cast_fp16)[name = tensor("y_15_cast_fp16")]; tensor var_1560 = const()[name = tensor("op_1560"), val = tensor([0, 2, 1])]; tensor var_1561_cast_fp16 = transpose(perm = var_1560, x = y_15_cast_fp16)[name = tensor("transpose_224")]; tensor input_59_cast_fp16 = add(x = input_57_cast_fp16, y = var_1561_cast_fp16)[name = tensor("input_59_cast_fp16")]; tensor input_61_axes_0 = const()[name = tensor("input_61_axes_0"), val = tensor([-1])]; tensor dec_layers_4_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_4_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30776384)))]; tensor var_1565_to_fp16 = const()[name = tensor("op_1565_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_61_cast_fp16 = layer_norm(axes = input_61_axes_0, epsilon = var_1565_to_fp16, gamma = dec_layers_4_norm_self_weight_to_fp16, x = input_59_cast_fp16)[name = tensor("input_61_cast_fp16")]; tensor dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30777984))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32547520))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3351680)))]; tensor linear_16_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized, x = input_61_cast_fp16)[name = tensor("linear_16_cast_fp16")]; tensor var_1579 = const()[name = tensor("op_1579"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_19_cast_fp16 = reshape(shape = var_1579, x = linear_16_cast_fp16)[name = tensor("qkv_19_cast_fp16")]; tensor q_9_begin_0 = const()[name = tensor("q_9_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_9_end_0 = const()[name = tensor("q_9_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_9_end_mask_0 = const()[name = tensor("q_9_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_9_squeeze_mask_0 = const()[name = tensor("q_9_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_9_cast_fp16 = slice_by_index(begin = q_9_begin_0, end = q_9_end_0, end_mask = q_9_end_mask_0, squeeze_mask = q_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor("q_9_cast_fp16")]; tensor new_k_9_begin_0 = const()[name = tensor("new_k_9_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_9_end_0 = const()[name = tensor("new_k_9_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_9_end_mask_0 = const()[name = tensor("new_k_9_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_9_squeeze_mask_0 = const()[name = tensor("new_k_9_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_9_cast_fp16 = slice_by_index(begin = new_k_9_begin_0, end = new_k_9_end_0, end_mask = new_k_9_end_mask_0, squeeze_mask = new_k_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor("new_k_9_cast_fp16")]; tensor new_v_9_begin_0 = const()[name = tensor("new_v_9_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_9_end_0 = const()[name = tensor("new_v_9_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_9_end_mask_0 = const()[name = tensor("new_v_9_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_9_squeeze_mask_0 = const()[name = tensor("new_v_9_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_9_cast_fp16 = slice_by_index(begin = new_v_9_begin_0, end = new_v_9_end_0, end_mask = new_v_9_end_mask_0, squeeze_mask = new_v_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor("new_v_9_cast_fp16")]; tensor sa_k_in_4_to_fp16_dtype_0 = const()[name = tensor("sa_k_in_4_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_k_in_4_to_fp16 = cast(dtype = sa_k_in_4_to_fp16_dtype_0, x = sa_k_in_4)[name = tensor("cast_58")]; tensor var_1640_cast_fp16 = mul(x = sa_k_in_4_to_fp16, y = var_599_cast_fp16)[name = tensor("op_1640_cast_fp16")]; tensor var_1641_cast_fp16 = mul(x = new_k_9_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_1641_cast_fp16")]; tensor sa_k_out_9_cast_fp16 = add(x = var_1640_cast_fp16, y = var_1641_cast_fp16)[name = tensor("sa_k_out_9_cast_fp16")]; tensor sa_k_out_9_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_k_out_9_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor sa_v_in_4_to_fp16_dtype_0 = const()[name = tensor("sa_v_in_4_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_v_in_4_to_fp16 = cast(dtype = sa_v_in_4_to_fp16_dtype_0, x = sa_v_in_4)[name = tensor("cast_57")]; tensor var_1647_cast_fp16 = mul(x = sa_v_in_4_to_fp16, y = var_599_cast_fp16)[name = tensor("op_1647_cast_fp16")]; tensor var_1648_cast_fp16 = mul(x = new_v_9_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_1648_cast_fp16")]; tensor sa_v_out_9_cast_fp16 = add(x = var_1647_cast_fp16, y = var_1648_cast_fp16)[name = tensor("sa_v_out_9_cast_fp16")]; tensor sa_v_out_9_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_v_out_9_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_1667 = const()[name = tensor("op_1667"), val = tensor([0, 2, -3, -1])]; tensor var_1669_transpose_x_0 = const()[name = tensor("op_1669_transpose_x_0"), val = tensor(false)]; tensor var_1669_transpose_y_0 = const()[name = tensor("op_1669_transpose_y_0"), val = tensor(false)]; tensor transpose_112_perm_0 = const()[name = tensor("transpose_112_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_113_perm_0 = const()[name = tensor("transpose_113_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_113 = transpose(perm = transpose_113_perm_0, x = sa_k_out_9_cast_fp16)[name = tensor("transpose_222")]; tensor transpose_112 = transpose(perm = transpose_112_perm_0, x = q_9_cast_fp16)[name = tensor("transpose_223")]; tensor var_1669_cast_fp16 = matmul(transpose_x = var_1669_transpose_x_0, transpose_y = var_1669_transpose_y_0, x = transpose_112, y = transpose_113)[name = tensor("op_1669_cast_fp16")]; tensor var_1670_to_fp16 = const()[name = tensor("op_1670_to_fp16"), val = tensor(0x1p-3)]; tensor scores_17_cast_fp16 = mul(x = var_1669_cast_fp16, y = var_1670_to_fp16)[name = tensor("scores_17_cast_fp16")]; tensor var_1688_to_fp16 = const()[name = tensor("op_1688_to_fp16"), val = tensor(-inf)]; tensor scores_19_cast_fp16 = select(a = var_1688_to_fp16, b = scores_17_cast_fp16, cond = var_647_cast_fp16)[name = tensor("scores_19_cast_fp16")]; tensor var_1690 = const()[name = tensor("op_1690"), val = tensor(-1)]; tensor probs_9_cast_fp16 = softmax(axis = var_1690, x = scores_19_cast_fp16)[name = tensor("probs_9_cast_fp16")]; tensor var_1693_transpose_x_0 = const()[name = tensor("op_1693_transpose_x_0"), val = tensor(false)]; tensor var_1693_transpose_y_0 = const()[name = tensor("op_1693_transpose_y_0"), val = tensor(false)]; tensor v_t_9_cast_fp16 = transpose(perm = var_1667, x = sa_v_out_9_cast_fp16)[name = tensor("transpose_221")]; tensor var_1693_cast_fp16 = matmul(transpose_x = var_1693_transpose_x_0, transpose_y = var_1693_transpose_y_0, x = probs_9_cast_fp16, y = v_t_9_cast_fp16)[name = tensor("op_1693_cast_fp16")]; tensor var_1698 = const()[name = tensor("op_1698"), val = tensor([0, 2, 1, 3])]; tensor var_1703 = const()[name = tensor("op_1703"), val = tensor([1, 1, -1])]; tensor var_1699_cast_fp16 = transpose(perm = var_1698, x = var_1693_cast_fp16)[name = tensor("transpose_220")]; tensor input_63_cast_fp16 = reshape(shape = var_1703, x = var_1699_cast_fp16)[name = tensor("input_63_cast_fp16")]; tensor dec_layers_4_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_4_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32552192))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33142080))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_4_self_attention_o_net_weight_to_fp16_quantized, x = input_63_cast_fp16)[name = tensor("linear_17_cast_fp16")]; tensor input_65_cast_fp16 = add(x = input_59_cast_fp16, y = linear_17_cast_fp16)[name = tensor("input_65_cast_fp16")]; tensor input_67_axes_0 = const()[name = tensor("input_67_axes_0"), val = tensor([-1])]; tensor dec_layers_4_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_4_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33143680)))]; tensor var_1711_to_fp16 = const()[name = tensor("op_1711_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_67_cast_fp16 = layer_norm(axes = input_67_axes_0, epsilon = var_1711_to_fp16, gamma = dec_layers_4_norm_xattn_query_weight_to_fp16, x = input_65_cast_fp16)[name = tensor("input_67_cast_fp16")]; tensor dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33145280))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33243648))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4057280)))]; tensor linear_18_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = tensor("linear_18_cast_fp16")]; tensor var_1724 = const()[name = tensor("op_1724"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_9_cast_fp16 = reshape(shape = var_1724, x = linear_18_cast_fp16)[name = tensor("xq_proj_9_cast_fp16")]; tensor var_1742 = const()[name = tensor("op_1742"), val = tensor([0, 2, -3, -1])]; tensor xa_v_4_to_fp16_dtype_0 = const()[name = tensor("xa_v_4_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_1744_transpose_x_0 = const()[name = tensor("op_1744_transpose_x_0"), val = tensor(false)]; tensor var_1744_transpose_y_0 = const()[name = tensor("op_1744_transpose_y_0"), val = tensor(false)]; tensor xa_k_4_to_fp16_dtype_0 = const()[name = tensor("xa_k_4_to_fp16_dtype_0"), val = tensor("fp16")]; tensor transpose_114_perm_0 = const()[name = tensor("transpose_114_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_115_perm_0 = const()[name = tensor("transpose_115_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_4_to_fp16 = cast(dtype = xa_k_4_to_fp16_dtype_0, x = xa_k_4)[name = tensor("cast_56")]; tensor transpose_115 = transpose(perm = transpose_115_perm_0, x = xa_k_4_to_fp16)[name = tensor("transpose_218")]; tensor transpose_114 = transpose(perm = transpose_114_perm_0, x = xq_proj_9_cast_fp16)[name = tensor("transpose_219")]; tensor var_1744_cast_fp16 = matmul(transpose_x = var_1744_transpose_x_0, transpose_y = var_1744_transpose_y_0, x = transpose_114, y = transpose_115)[name = tensor("op_1744_cast_fp16")]; tensor var_1745_to_fp16 = const()[name = tensor("op_1745_to_fp16"), val = tensor(0x1.6ap-4)]; tensor xscores_17_cast_fp16 = mul(x = var_1744_cast_fp16, y = var_1745_to_fp16)[name = tensor("xscores_17_cast_fp16")]; tensor var_1763_to_fp16 = const()[name = tensor("op_1763_to_fp16"), val = tensor(-inf)]; tensor xscores_19_cast_fp16 = select(a = var_1763_to_fp16, b = xscores_17_cast_fp16, cond = var_722_cast_fp16)[name = tensor("xscores_19_cast_fp16")]; tensor var_1765 = const()[name = tensor("op_1765"), val = tensor(-1)]; tensor xprobs_9_cast_fp16 = softmax(axis = var_1765, x = xscores_19_cast_fp16)[name = tensor("xprobs_9_cast_fp16")]; tensor var_1768_transpose_x_0 = const()[name = tensor("op_1768_transpose_x_0"), val = tensor(false)]; tensor var_1768_transpose_y_0 = const()[name = tensor("op_1768_transpose_y_0"), val = tensor(false)]; tensor xa_v_4_to_fp16 = cast(dtype = xa_v_4_to_fp16_dtype_0, x = xa_v_4)[name = tensor("cast_55")]; tensor xvT_9_cast_fp16 = transpose(perm = var_1742, x = xa_v_4_to_fp16)[name = tensor("transpose_217")]; tensor var_1768_cast_fp16 = matmul(transpose_x = var_1768_transpose_x_0, transpose_y = var_1768_transpose_y_0, x = xprobs_9_cast_fp16, y = xvT_9_cast_fp16)[name = tensor("op_1768_cast_fp16")]; tensor var_1773 = const()[name = tensor("op_1773"), val = tensor([0, 2, 1, 3])]; tensor var_1778 = const()[name = tensor("op_1778"), val = tensor([1, 1, -1])]; tensor var_1774_cast_fp16 = transpose(perm = var_1773, x = var_1768_cast_fp16)[name = tensor("transpose_216")]; tensor input_69_cast_fp16 = reshape(shape = var_1778, x = var_1774_cast_fp16)[name = tensor("input_69_cast_fp16")]; tensor dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33243968))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33342336))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized, x = input_69_cast_fp16)[name = tensor("linear_19_cast_fp16")]; tensor input_71_cast_fp16 = add(x = input_65_cast_fp16, y = linear_19_cast_fp16)[name = tensor("input_71_cast_fp16")]; tensor x_33_axes_0 = const()[name = tensor("x_33_axes_0"), val = tensor([-1])]; tensor dec_layers_4_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_4_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33343936)))]; tensor var_1786_to_fp16 = const()[name = tensor("op_1786_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_33_cast_fp16 = layer_norm(axes = x_33_axes_0, epsilon = var_1786_to_fp16, gamma = dec_layers_4_norm_pos_ff_weight_to_fp16, x = input_71_cast_fp16)[name = tensor("x_33_cast_fp16")]; tensor var_1802 = const()[name = tensor("op_1802"), val = tensor([0, 2, 1])]; tensor y_17_pad_type_0 = const()[name = tensor("y_17_pad_type_0"), val = tensor("valid")]; tensor y_17_strides_0 = const()[name = tensor("y_17_strides_0"), val = tensor([1])]; tensor y_17_pad_0 = const()[name = tensor("y_17_pad_0"), val = tensor([0, 0])]; tensor y_17_dilations_0 = const()[name = tensor("y_17_dilations_0"), val = tensor([1])]; tensor y_17_groups_0 = const()[name = tensor("y_17_groups_0"), val = tensor(1)]; tensor dec_layers_4_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_4_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33345536))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35704896))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6519040)))]; tensor x_35_cast_fp16 = transpose(perm = var_1802, x = x_33_cast_fp16)[name = tensor("transpose_215")]; tensor y_17_cast_fp16 = conv(dilations = y_17_dilations_0, groups = y_17_groups_0, pad = y_17_pad_0, pad_type = y_17_pad_type_0, strides = y_17_strides_0, weight = dec_layers_4_pos_ff_proj_weight_to_fp16_quantized, x = x_35_cast_fp16)[name = tensor("y_17_cast_fp16")]; tensor x_37_mode_0 = const()[name = tensor("x_37_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = y_17_cast_fp16)[name = tensor("x_37_cast_fp16")]; tensor y_19_pad_type_0 = const()[name = tensor("y_19_pad_type_0"), val = tensor("valid")]; tensor y_19_strides_0 = const()[name = tensor("y_19_strides_0"), val = tensor([1])]; tensor y_19_pad_0 = const()[name = tensor("y_19_pad_0"), val = tensor([0, 0])]; tensor y_19_dilations_0 = const()[name = tensor("y_19_dilations_0"), val = tensor([1])]; tensor y_19_groups_0 = const()[name = tensor("y_19_groups_0"), val = tensor(1)]; tensor dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35711104))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38070464))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor y_19_cast_fp16 = conv(dilations = y_19_dilations_0, groups = y_19_groups_0, pad = y_19_pad_0, pad_type = y_19_pad_type_0, strides = y_19_strides_0, weight = dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized, x = x_37_cast_fp16)[name = tensor("y_19_cast_fp16")]; tensor var_1820 = const()[name = tensor("op_1820"), val = tensor([0, 2, 1])]; tensor var_1821_cast_fp16 = transpose(perm = var_1820, x = y_19_cast_fp16)[name = tensor("transpose_214")]; tensor input_73_cast_fp16 = add(x = input_71_cast_fp16, y = var_1821_cast_fp16)[name = tensor("input_73_cast_fp16")]; tensor input_75_axes_0 = const()[name = tensor("input_75_axes_0"), val = tensor([-1])]; tensor dec_layers_5_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_5_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38072064)))]; tensor var_1825_to_fp16 = const()[name = tensor("op_1825_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_75_cast_fp16 = layer_norm(axes = input_75_axes_0, epsilon = var_1825_to_fp16, gamma = dec_layers_5_norm_self_weight_to_fp16, x = input_73_cast_fp16)[name = tensor("input_75_cast_fp16")]; tensor dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38073664))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39843200))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3351680)))]; tensor linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = tensor("linear_20_cast_fp16")]; tensor var_1839 = const()[name = tensor("op_1839"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_23_cast_fp16 = reshape(shape = var_1839, x = linear_20_cast_fp16)[name = tensor("qkv_23_cast_fp16")]; tensor q_11_begin_0 = const()[name = tensor("q_11_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_11_end_0 = const()[name = tensor("q_11_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_11_end_mask_0 = const()[name = tensor("q_11_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_11_squeeze_mask_0 = const()[name = tensor("q_11_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_11_cast_fp16 = slice_by_index(begin = q_11_begin_0, end = q_11_end_0, end_mask = q_11_end_mask_0, squeeze_mask = q_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor("q_11_cast_fp16")]; tensor new_k_11_begin_0 = const()[name = tensor("new_k_11_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_11_end_0 = const()[name = tensor("new_k_11_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_11_end_mask_0 = const()[name = tensor("new_k_11_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_11_squeeze_mask_0 = const()[name = tensor("new_k_11_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_11_cast_fp16 = slice_by_index(begin = new_k_11_begin_0, end = new_k_11_end_0, end_mask = new_k_11_end_mask_0, squeeze_mask = new_k_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor("new_k_11_cast_fp16")]; tensor new_v_11_begin_0 = const()[name = tensor("new_v_11_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_11_end_0 = const()[name = tensor("new_v_11_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_11_end_mask_0 = const()[name = tensor("new_v_11_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_11_squeeze_mask_0 = const()[name = tensor("new_v_11_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_11_cast_fp16 = slice_by_index(begin = new_v_11_begin_0, end = new_v_11_end_0, end_mask = new_v_11_end_mask_0, squeeze_mask = new_v_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor("new_v_11_cast_fp16")]; tensor sa_k_in_5_to_fp16_dtype_0 = const()[name = tensor("sa_k_in_5_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_k_in_5_to_fp16 = cast(dtype = sa_k_in_5_to_fp16_dtype_0, x = sa_k_in_5)[name = tensor("cast_54")]; tensor var_1900_cast_fp16 = mul(x = sa_k_in_5_to_fp16, y = var_599_cast_fp16)[name = tensor("op_1900_cast_fp16")]; tensor var_1901_cast_fp16 = mul(x = new_k_11_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_1901_cast_fp16")]; tensor sa_k_out_11_cast_fp16 = add(x = var_1900_cast_fp16, y = var_1901_cast_fp16)[name = tensor("sa_k_out_11_cast_fp16")]; tensor sa_k_out_11_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_k_out_11_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor sa_v_in_5_to_fp16_dtype_0 = const()[name = tensor("sa_v_in_5_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_v_in_5_to_fp16 = cast(dtype = sa_v_in_5_to_fp16_dtype_0, x = sa_v_in_5)[name = tensor("cast_53")]; tensor var_1907_cast_fp16 = mul(x = sa_v_in_5_to_fp16, y = var_599_cast_fp16)[name = tensor("op_1907_cast_fp16")]; tensor var_1908_cast_fp16 = mul(x = new_v_11_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_1908_cast_fp16")]; tensor sa_v_out_11_cast_fp16 = add(x = var_1907_cast_fp16, y = var_1908_cast_fp16)[name = tensor("sa_v_out_11_cast_fp16")]; tensor sa_v_out_11_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_v_out_11_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_1927 = const()[name = tensor("op_1927"), val = tensor([0, 2, -3, -1])]; tensor var_1929_transpose_x_0 = const()[name = tensor("op_1929_transpose_x_0"), val = tensor(false)]; tensor var_1929_transpose_y_0 = const()[name = tensor("op_1929_transpose_y_0"), val = tensor(false)]; tensor transpose_116_perm_0 = const()[name = tensor("transpose_116_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_117_perm_0 = const()[name = tensor("transpose_117_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_117 = transpose(perm = transpose_117_perm_0, x = sa_k_out_11_cast_fp16)[name = tensor("transpose_212")]; tensor transpose_116 = transpose(perm = transpose_116_perm_0, x = q_11_cast_fp16)[name = tensor("transpose_213")]; tensor var_1929_cast_fp16 = matmul(transpose_x = var_1929_transpose_x_0, transpose_y = var_1929_transpose_y_0, x = transpose_116, y = transpose_117)[name = tensor("op_1929_cast_fp16")]; tensor var_1930_to_fp16 = const()[name = tensor("op_1930_to_fp16"), val = tensor(0x1p-3)]; tensor scores_21_cast_fp16 = mul(x = var_1929_cast_fp16, y = var_1930_to_fp16)[name = tensor("scores_21_cast_fp16")]; tensor var_1948_to_fp16 = const()[name = tensor("op_1948_to_fp16"), val = tensor(-inf)]; tensor scores_23_cast_fp16 = select(a = var_1948_to_fp16, b = scores_21_cast_fp16, cond = var_647_cast_fp16)[name = tensor("scores_23_cast_fp16")]; tensor var_1950 = const()[name = tensor("op_1950"), val = tensor(-1)]; tensor probs_11_cast_fp16 = softmax(axis = var_1950, x = scores_23_cast_fp16)[name = tensor("probs_11_cast_fp16")]; tensor var_1953_transpose_x_0 = const()[name = tensor("op_1953_transpose_x_0"), val = tensor(false)]; tensor var_1953_transpose_y_0 = const()[name = tensor("op_1953_transpose_y_0"), val = tensor(false)]; tensor v_t_11_cast_fp16 = transpose(perm = var_1927, x = sa_v_out_11_cast_fp16)[name = tensor("transpose_211")]; tensor var_1953_cast_fp16 = matmul(transpose_x = var_1953_transpose_x_0, transpose_y = var_1953_transpose_y_0, x = probs_11_cast_fp16, y = v_t_11_cast_fp16)[name = tensor("op_1953_cast_fp16")]; tensor var_1958 = const()[name = tensor("op_1958"), val = tensor([0, 2, 1, 3])]; tensor var_1963 = const()[name = tensor("op_1963"), val = tensor([1, 1, -1])]; tensor var_1959_cast_fp16 = transpose(perm = var_1958, x = var_1953_cast_fp16)[name = tensor("transpose_210")]; tensor input_77_cast_fp16 = reshape(shape = var_1963, x = var_1959_cast_fp16)[name = tensor("input_77_cast_fp16")]; tensor dec_layers_5_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_5_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39847872))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40437760))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_21_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_5_self_attention_o_net_weight_to_fp16_quantized, x = input_77_cast_fp16)[name = tensor("linear_21_cast_fp16")]; tensor input_79_cast_fp16 = add(x = input_73_cast_fp16, y = linear_21_cast_fp16)[name = tensor("input_79_cast_fp16")]; tensor input_81_axes_0 = const()[name = tensor("input_81_axes_0"), val = tensor([-1])]; tensor dec_layers_5_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_5_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40439360)))]; tensor var_1971_to_fp16 = const()[name = tensor("op_1971_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_81_cast_fp16 = layer_norm(axes = input_81_axes_0, epsilon = var_1971_to_fp16, gamma = dec_layers_5_norm_xattn_query_weight_to_fp16, x = input_79_cast_fp16)[name = tensor("input_81_cast_fp16")]; tensor dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40440960))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40539328))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4057280)))]; tensor linear_22_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized, x = input_81_cast_fp16)[name = tensor("linear_22_cast_fp16")]; tensor var_1984 = const()[name = tensor("op_1984"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_11_cast_fp16 = reshape(shape = var_1984, x = linear_22_cast_fp16)[name = tensor("xq_proj_11_cast_fp16")]; tensor var_2002 = const()[name = tensor("op_2002"), val = tensor([0, 2, -3, -1])]; tensor xa_v_5_to_fp16_dtype_0 = const()[name = tensor("xa_v_5_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_2004_transpose_x_0 = const()[name = tensor("op_2004_transpose_x_0"), val = tensor(false)]; tensor var_2004_transpose_y_0 = const()[name = tensor("op_2004_transpose_y_0"), val = tensor(false)]; tensor xa_k_5_to_fp16_dtype_0 = const()[name = tensor("xa_k_5_to_fp16_dtype_0"), val = tensor("fp16")]; tensor transpose_118_perm_0 = const()[name = tensor("transpose_118_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_119_perm_0 = const()[name = tensor("transpose_119_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_5_to_fp16 = cast(dtype = xa_k_5_to_fp16_dtype_0, x = xa_k_5)[name = tensor("cast_52")]; tensor transpose_119 = transpose(perm = transpose_119_perm_0, x = xa_k_5_to_fp16)[name = tensor("transpose_208")]; tensor transpose_118 = transpose(perm = transpose_118_perm_0, x = xq_proj_11_cast_fp16)[name = tensor("transpose_209")]; tensor var_2004_cast_fp16 = matmul(transpose_x = var_2004_transpose_x_0, transpose_y = var_2004_transpose_y_0, x = transpose_118, y = transpose_119)[name = tensor("op_2004_cast_fp16")]; tensor var_2005_to_fp16 = const()[name = tensor("op_2005_to_fp16"), val = tensor(0x1.6ap-4)]; tensor xscores_21_cast_fp16 = mul(x = var_2004_cast_fp16, y = var_2005_to_fp16)[name = tensor("xscores_21_cast_fp16")]; tensor var_2023_to_fp16 = const()[name = tensor("op_2023_to_fp16"), val = tensor(-inf)]; tensor xscores_23_cast_fp16 = select(a = var_2023_to_fp16, b = xscores_21_cast_fp16, cond = var_722_cast_fp16)[name = tensor("xscores_23_cast_fp16")]; tensor var_2025 = const()[name = tensor("op_2025"), val = tensor(-1)]; tensor xprobs_11_cast_fp16 = softmax(axis = var_2025, x = xscores_23_cast_fp16)[name = tensor("xprobs_11_cast_fp16")]; tensor var_2028_transpose_x_0 = const()[name = tensor("op_2028_transpose_x_0"), val = tensor(false)]; tensor var_2028_transpose_y_0 = const()[name = tensor("op_2028_transpose_y_0"), val = tensor(false)]; tensor xa_v_5_to_fp16 = cast(dtype = xa_v_5_to_fp16_dtype_0, x = xa_v_5)[name = tensor("cast_51")]; tensor xvT_11_cast_fp16 = transpose(perm = var_2002, x = xa_v_5_to_fp16)[name = tensor("transpose_207")]; tensor var_2028_cast_fp16 = matmul(transpose_x = var_2028_transpose_x_0, transpose_y = var_2028_transpose_y_0, x = xprobs_11_cast_fp16, y = xvT_11_cast_fp16)[name = tensor("op_2028_cast_fp16")]; tensor var_2033 = const()[name = tensor("op_2033"), val = tensor([0, 2, 1, 3])]; tensor var_2038 = const()[name = tensor("op_2038"), val = tensor([1, 1, -1])]; tensor var_2034_cast_fp16 = transpose(perm = var_2033, x = var_2028_cast_fp16)[name = tensor("transpose_206")]; tensor input_83_cast_fp16 = reshape(shape = var_2038, x = var_2034_cast_fp16)[name = tensor("input_83_cast_fp16")]; tensor dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40539648))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40638016))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = tensor("linear_23_cast_fp16")]; tensor input_85_cast_fp16 = add(x = input_79_cast_fp16, y = linear_23_cast_fp16)[name = tensor("input_85_cast_fp16")]; tensor x_41_axes_0 = const()[name = tensor("x_41_axes_0"), val = tensor([-1])]; tensor dec_layers_5_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_5_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40639616)))]; tensor var_2046_to_fp16 = const()[name = tensor("op_2046_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_41_cast_fp16 = layer_norm(axes = x_41_axes_0, epsilon = var_2046_to_fp16, gamma = dec_layers_5_norm_pos_ff_weight_to_fp16, x = input_85_cast_fp16)[name = tensor("x_41_cast_fp16")]; tensor var_2062 = const()[name = tensor("op_2062"), val = tensor([0, 2, 1])]; tensor y_21_pad_type_0 = const()[name = tensor("y_21_pad_type_0"), val = tensor("valid")]; tensor y_21_strides_0 = const()[name = tensor("y_21_strides_0"), val = tensor([1])]; tensor y_21_pad_0 = const()[name = tensor("y_21_pad_0"), val = tensor([0, 0])]; tensor y_21_dilations_0 = const()[name = tensor("y_21_dilations_0"), val = tensor([1])]; tensor y_21_groups_0 = const()[name = tensor("y_21_groups_0"), val = tensor(1)]; tensor dec_layers_5_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_5_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40641216))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43000576))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6519040)))]; tensor x_43_cast_fp16 = transpose(perm = var_2062, x = x_41_cast_fp16)[name = tensor("transpose_205")]; tensor y_21_cast_fp16 = conv(dilations = y_21_dilations_0, groups = y_21_groups_0, pad = y_21_pad_0, pad_type = y_21_pad_type_0, strides = y_21_strides_0, weight = dec_layers_5_pos_ff_proj_weight_to_fp16_quantized, x = x_43_cast_fp16)[name = tensor("y_21_cast_fp16")]; tensor x_45_mode_0 = const()[name = tensor("x_45_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_45_cast_fp16 = gelu(mode = x_45_mode_0, x = y_21_cast_fp16)[name = tensor("x_45_cast_fp16")]; tensor y_23_pad_type_0 = const()[name = tensor("y_23_pad_type_0"), val = tensor("valid")]; tensor y_23_strides_0 = const()[name = tensor("y_23_strides_0"), val = tensor([1])]; tensor y_23_pad_0 = const()[name = tensor("y_23_pad_0"), val = tensor([0, 0])]; tensor y_23_dilations_0 = const()[name = tensor("y_23_dilations_0"), val = tensor([1])]; tensor y_23_groups_0 = const()[name = tensor("y_23_groups_0"), val = tensor(1)]; tensor dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43006784))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45366144))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor y_23_cast_fp16 = conv(dilations = y_23_dilations_0, groups = y_23_groups_0, pad = y_23_pad_0, pad_type = y_23_pad_type_0, strides = y_23_strides_0, weight = dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized, x = x_45_cast_fp16)[name = tensor("y_23_cast_fp16")]; tensor var_2080 = const()[name = tensor("op_2080"), val = tensor([0, 2, 1])]; tensor var_2081_cast_fp16 = transpose(perm = var_2080, x = y_23_cast_fp16)[name = tensor("transpose_204")]; tensor input_87_cast_fp16 = add(x = input_85_cast_fp16, y = var_2081_cast_fp16)[name = tensor("input_87_cast_fp16")]; tensor input_89_axes_0 = const()[name = tensor("input_89_axes_0"), val = tensor([-1])]; tensor dec_layers_6_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_6_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45367744)))]; tensor var_2085_to_fp16 = const()[name = tensor("op_2085_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_89_cast_fp16 = layer_norm(axes = input_89_axes_0, epsilon = var_2085_to_fp16, gamma = dec_layers_6_norm_self_weight_to_fp16, x = input_87_cast_fp16)[name = tensor("input_89_cast_fp16")]; tensor dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45369344))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47138880))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3351680)))]; tensor linear_24_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized, x = input_89_cast_fp16)[name = tensor("linear_24_cast_fp16")]; tensor var_2099 = const()[name = tensor("op_2099"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_27_cast_fp16 = reshape(shape = var_2099, x = linear_24_cast_fp16)[name = tensor("qkv_27_cast_fp16")]; tensor q_13_begin_0 = const()[name = tensor("q_13_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_13_end_0 = const()[name = tensor("q_13_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_13_end_mask_0 = const()[name = tensor("q_13_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_13_squeeze_mask_0 = const()[name = tensor("q_13_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_13_cast_fp16 = slice_by_index(begin = q_13_begin_0, end = q_13_end_0, end_mask = q_13_end_mask_0, squeeze_mask = q_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor("q_13_cast_fp16")]; tensor new_k_13_begin_0 = const()[name = tensor("new_k_13_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_13_end_0 = const()[name = tensor("new_k_13_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_13_end_mask_0 = const()[name = tensor("new_k_13_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_13_squeeze_mask_0 = const()[name = tensor("new_k_13_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_13_cast_fp16 = slice_by_index(begin = new_k_13_begin_0, end = new_k_13_end_0, end_mask = new_k_13_end_mask_0, squeeze_mask = new_k_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor("new_k_13_cast_fp16")]; tensor new_v_13_begin_0 = const()[name = tensor("new_v_13_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_13_end_0 = const()[name = tensor("new_v_13_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_13_end_mask_0 = const()[name = tensor("new_v_13_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_13_squeeze_mask_0 = const()[name = tensor("new_v_13_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_13_cast_fp16 = slice_by_index(begin = new_v_13_begin_0, end = new_v_13_end_0, end_mask = new_v_13_end_mask_0, squeeze_mask = new_v_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor("new_v_13_cast_fp16")]; tensor sa_k_in_6_to_fp16_dtype_0 = const()[name = tensor("sa_k_in_6_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_k_in_6_to_fp16 = cast(dtype = sa_k_in_6_to_fp16_dtype_0, x = sa_k_in_6)[name = tensor("cast_50")]; tensor var_2160_cast_fp16 = mul(x = sa_k_in_6_to_fp16, y = var_599_cast_fp16)[name = tensor("op_2160_cast_fp16")]; tensor var_2161_cast_fp16 = mul(x = new_k_13_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_2161_cast_fp16")]; tensor sa_k_out_13_cast_fp16 = add(x = var_2160_cast_fp16, y = var_2161_cast_fp16)[name = tensor("sa_k_out_13_cast_fp16")]; tensor sa_k_out_13_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_k_out_13_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor sa_v_in_6_to_fp16_dtype_0 = const()[name = tensor("sa_v_in_6_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_v_in_6_to_fp16 = cast(dtype = sa_v_in_6_to_fp16_dtype_0, x = sa_v_in_6)[name = tensor("cast_49")]; tensor var_2167_cast_fp16 = mul(x = sa_v_in_6_to_fp16, y = var_599_cast_fp16)[name = tensor("op_2167_cast_fp16")]; tensor var_2168_cast_fp16 = mul(x = new_v_13_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_2168_cast_fp16")]; tensor sa_v_out_13_cast_fp16 = add(x = var_2167_cast_fp16, y = var_2168_cast_fp16)[name = tensor("sa_v_out_13_cast_fp16")]; tensor sa_v_out_13_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_v_out_13_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_2187 = const()[name = tensor("op_2187"), val = tensor([0, 2, -3, -1])]; tensor var_2189_transpose_x_0 = const()[name = tensor("op_2189_transpose_x_0"), val = tensor(false)]; tensor var_2189_transpose_y_0 = const()[name = tensor("op_2189_transpose_y_0"), val = tensor(false)]; tensor transpose_120_perm_0 = const()[name = tensor("transpose_120_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_121_perm_0 = const()[name = tensor("transpose_121_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_121 = transpose(perm = transpose_121_perm_0, x = sa_k_out_13_cast_fp16)[name = tensor("transpose_202")]; tensor transpose_120 = transpose(perm = transpose_120_perm_0, x = q_13_cast_fp16)[name = tensor("transpose_203")]; tensor var_2189_cast_fp16 = matmul(transpose_x = var_2189_transpose_x_0, transpose_y = var_2189_transpose_y_0, x = transpose_120, y = transpose_121)[name = tensor("op_2189_cast_fp16")]; tensor var_2190_to_fp16 = const()[name = tensor("op_2190_to_fp16"), val = tensor(0x1p-3)]; tensor scores_25_cast_fp16 = mul(x = var_2189_cast_fp16, y = var_2190_to_fp16)[name = tensor("scores_25_cast_fp16")]; tensor var_2208_to_fp16 = const()[name = tensor("op_2208_to_fp16"), val = tensor(-inf)]; tensor scores_27_cast_fp16 = select(a = var_2208_to_fp16, b = scores_25_cast_fp16, cond = var_647_cast_fp16)[name = tensor("scores_27_cast_fp16")]; tensor var_2210 = const()[name = tensor("op_2210"), val = tensor(-1)]; tensor probs_13_cast_fp16 = softmax(axis = var_2210, x = scores_27_cast_fp16)[name = tensor("probs_13_cast_fp16")]; tensor var_2213_transpose_x_0 = const()[name = tensor("op_2213_transpose_x_0"), val = tensor(false)]; tensor var_2213_transpose_y_0 = const()[name = tensor("op_2213_transpose_y_0"), val = tensor(false)]; tensor v_t_13_cast_fp16 = transpose(perm = var_2187, x = sa_v_out_13_cast_fp16)[name = tensor("transpose_201")]; tensor var_2213_cast_fp16 = matmul(transpose_x = var_2213_transpose_x_0, transpose_y = var_2213_transpose_y_0, x = probs_13_cast_fp16, y = v_t_13_cast_fp16)[name = tensor("op_2213_cast_fp16")]; tensor var_2218 = const()[name = tensor("op_2218"), val = tensor([0, 2, 1, 3])]; tensor var_2223 = const()[name = tensor("op_2223"), val = tensor([1, 1, -1])]; tensor var_2219_cast_fp16 = transpose(perm = var_2218, x = var_2213_cast_fp16)[name = tensor("transpose_200")]; tensor input_91_cast_fp16 = reshape(shape = var_2223, x = var_2219_cast_fp16)[name = tensor("input_91_cast_fp16")]; tensor dec_layers_6_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_6_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47143552))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47733440))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_6_self_attention_o_net_weight_to_fp16_quantized, x = input_91_cast_fp16)[name = tensor("linear_25_cast_fp16")]; tensor input_93_cast_fp16 = add(x = input_87_cast_fp16, y = linear_25_cast_fp16)[name = tensor("input_93_cast_fp16")]; tensor input_95_axes_0 = const()[name = tensor("input_95_axes_0"), val = tensor([-1])]; tensor dec_layers_6_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_6_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47735040)))]; tensor var_2231_to_fp16 = const()[name = tensor("op_2231_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_95_cast_fp16 = layer_norm(axes = input_95_axes_0, epsilon = var_2231_to_fp16, gamma = dec_layers_6_norm_xattn_query_weight_to_fp16, x = input_93_cast_fp16)[name = tensor("input_95_cast_fp16")]; tensor dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47736640))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47835008))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4057280)))]; tensor linear_26_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized, x = input_95_cast_fp16)[name = tensor("linear_26_cast_fp16")]; tensor var_2244 = const()[name = tensor("op_2244"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_13_cast_fp16 = reshape(shape = var_2244, x = linear_26_cast_fp16)[name = tensor("xq_proj_13_cast_fp16")]; tensor var_2262 = const()[name = tensor("op_2262"), val = tensor([0, 2, -3, -1])]; tensor xa_v_6_to_fp16_dtype_0 = const()[name = tensor("xa_v_6_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_2264_transpose_x_0 = const()[name = tensor("op_2264_transpose_x_0"), val = tensor(false)]; tensor var_2264_transpose_y_0 = const()[name = tensor("op_2264_transpose_y_0"), val = tensor(false)]; tensor xa_k_6_to_fp16_dtype_0 = const()[name = tensor("xa_k_6_to_fp16_dtype_0"), val = tensor("fp16")]; tensor transpose_122_perm_0 = const()[name = tensor("transpose_122_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_123_perm_0 = const()[name = tensor("transpose_123_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_6_to_fp16 = cast(dtype = xa_k_6_to_fp16_dtype_0, x = xa_k_6)[name = tensor("cast_48")]; tensor transpose_123 = transpose(perm = transpose_123_perm_0, x = xa_k_6_to_fp16)[name = tensor("transpose_198")]; tensor transpose_122 = transpose(perm = transpose_122_perm_0, x = xq_proj_13_cast_fp16)[name = tensor("transpose_199")]; tensor var_2264_cast_fp16 = matmul(transpose_x = var_2264_transpose_x_0, transpose_y = var_2264_transpose_y_0, x = transpose_122, y = transpose_123)[name = tensor("op_2264_cast_fp16")]; tensor var_2265_to_fp16 = const()[name = tensor("op_2265_to_fp16"), val = tensor(0x1.6ap-4)]; tensor xscores_25_cast_fp16 = mul(x = var_2264_cast_fp16, y = var_2265_to_fp16)[name = tensor("xscores_25_cast_fp16")]; tensor var_2283_to_fp16 = const()[name = tensor("op_2283_to_fp16"), val = tensor(-inf)]; tensor xscores_27_cast_fp16 = select(a = var_2283_to_fp16, b = xscores_25_cast_fp16, cond = var_722_cast_fp16)[name = tensor("xscores_27_cast_fp16")]; tensor var_2285 = const()[name = tensor("op_2285"), val = tensor(-1)]; tensor xprobs_13_cast_fp16 = softmax(axis = var_2285, x = xscores_27_cast_fp16)[name = tensor("xprobs_13_cast_fp16")]; tensor var_2288_transpose_x_0 = const()[name = tensor("op_2288_transpose_x_0"), val = tensor(false)]; tensor var_2288_transpose_y_0 = const()[name = tensor("op_2288_transpose_y_0"), val = tensor(false)]; tensor xa_v_6_to_fp16 = cast(dtype = xa_v_6_to_fp16_dtype_0, x = xa_v_6)[name = tensor("cast_47")]; tensor xvT_13_cast_fp16 = transpose(perm = var_2262, x = xa_v_6_to_fp16)[name = tensor("transpose_197")]; tensor var_2288_cast_fp16 = matmul(transpose_x = var_2288_transpose_x_0, transpose_y = var_2288_transpose_y_0, x = xprobs_13_cast_fp16, y = xvT_13_cast_fp16)[name = tensor("op_2288_cast_fp16")]; tensor var_2293 = const()[name = tensor("op_2293"), val = tensor([0, 2, 1, 3])]; tensor var_2298 = const()[name = tensor("op_2298"), val = tensor([1, 1, -1])]; tensor var_2294_cast_fp16 = transpose(perm = var_2293, x = var_2288_cast_fp16)[name = tensor("transpose_196")]; tensor input_97_cast_fp16 = reshape(shape = var_2298, x = var_2294_cast_fp16)[name = tensor("input_97_cast_fp16")]; tensor dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47835328))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47933696))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_27_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized, x = input_97_cast_fp16)[name = tensor("linear_27_cast_fp16")]; tensor input_99_cast_fp16 = add(x = input_93_cast_fp16, y = linear_27_cast_fp16)[name = tensor("input_99_cast_fp16")]; tensor x_49_axes_0 = const()[name = tensor("x_49_axes_0"), val = tensor([-1])]; tensor dec_layers_6_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_6_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47935296)))]; tensor var_2306_to_fp16 = const()[name = tensor("op_2306_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_49_cast_fp16 = layer_norm(axes = x_49_axes_0, epsilon = var_2306_to_fp16, gamma = dec_layers_6_norm_pos_ff_weight_to_fp16, x = input_99_cast_fp16)[name = tensor("x_49_cast_fp16")]; tensor var_2322 = const()[name = tensor("op_2322"), val = tensor([0, 2, 1])]; tensor y_25_pad_type_0 = const()[name = tensor("y_25_pad_type_0"), val = tensor("valid")]; tensor y_25_strides_0 = const()[name = tensor("y_25_strides_0"), val = tensor([1])]; tensor y_25_pad_0 = const()[name = tensor("y_25_pad_0"), val = tensor([0, 0])]; tensor y_25_dilations_0 = const()[name = tensor("y_25_dilations_0"), val = tensor([1])]; tensor y_25_groups_0 = const()[name = tensor("y_25_groups_0"), val = tensor(1)]; tensor dec_layers_6_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_6_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47936896))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50296256))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6519040)))]; tensor x_51_cast_fp16 = transpose(perm = var_2322, x = x_49_cast_fp16)[name = tensor("transpose_195")]; tensor y_25_cast_fp16 = conv(dilations = y_25_dilations_0, groups = y_25_groups_0, pad = y_25_pad_0, pad_type = y_25_pad_type_0, strides = y_25_strides_0, weight = dec_layers_6_pos_ff_proj_weight_to_fp16_quantized, x = x_51_cast_fp16)[name = tensor("y_25_cast_fp16")]; tensor x_53_mode_0 = const()[name = tensor("x_53_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = y_25_cast_fp16)[name = tensor("x_53_cast_fp16")]; tensor y_27_pad_type_0 = const()[name = tensor("y_27_pad_type_0"), val = tensor("valid")]; tensor y_27_strides_0 = const()[name = tensor("y_27_strides_0"), val = tensor([1])]; tensor y_27_pad_0 = const()[name = tensor("y_27_pad_0"), val = tensor([0, 0])]; tensor y_27_dilations_0 = const()[name = tensor("y_27_dilations_0"), val = tensor([1])]; tensor y_27_groups_0 = const()[name = tensor("y_27_groups_0"), val = tensor(1)]; tensor dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50302464))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52661824))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor y_27_cast_fp16 = conv(dilations = y_27_dilations_0, groups = y_27_groups_0, pad = y_27_pad_0, pad_type = y_27_pad_type_0, strides = y_27_strides_0, weight = dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized, x = x_53_cast_fp16)[name = tensor("y_27_cast_fp16")]; tensor var_2340 = const()[name = tensor("op_2340"), val = tensor([0, 2, 1])]; tensor var_2341_cast_fp16 = transpose(perm = var_2340, x = y_27_cast_fp16)[name = tensor("transpose_194")]; tensor input_101_cast_fp16 = add(x = input_99_cast_fp16, y = var_2341_cast_fp16)[name = tensor("input_101_cast_fp16")]; tensor input_103_axes_0 = const()[name = tensor("input_103_axes_0"), val = tensor([-1])]; tensor dec_layers_7_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_7_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52663424)))]; tensor var_2345_to_fp16 = const()[name = tensor("op_2345_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_103_cast_fp16 = layer_norm(axes = input_103_axes_0, epsilon = var_2345_to_fp16, gamma = dec_layers_7_norm_self_weight_to_fp16, x = input_101_cast_fp16)[name = tensor("input_103_cast_fp16")]; tensor dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52665024))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54434560))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3351680)))]; tensor linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized, x = input_103_cast_fp16)[name = tensor("linear_28_cast_fp16")]; tensor var_2359 = const()[name = tensor("op_2359"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_31_cast_fp16 = reshape(shape = var_2359, x = linear_28_cast_fp16)[name = tensor("qkv_31_cast_fp16")]; tensor q_15_begin_0 = const()[name = tensor("q_15_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_15_end_0 = const()[name = tensor("q_15_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_15_end_mask_0 = const()[name = tensor("q_15_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_15_squeeze_mask_0 = const()[name = tensor("q_15_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_15_cast_fp16 = slice_by_index(begin = q_15_begin_0, end = q_15_end_0, end_mask = q_15_end_mask_0, squeeze_mask = q_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor("q_15_cast_fp16")]; tensor new_k_15_begin_0 = const()[name = tensor("new_k_15_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_15_end_0 = const()[name = tensor("new_k_15_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_15_end_mask_0 = const()[name = tensor("new_k_15_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_15_squeeze_mask_0 = const()[name = tensor("new_k_15_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_15_cast_fp16 = slice_by_index(begin = new_k_15_begin_0, end = new_k_15_end_0, end_mask = new_k_15_end_mask_0, squeeze_mask = new_k_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor("new_k_15_cast_fp16")]; tensor new_v_15_begin_0 = const()[name = tensor("new_v_15_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_15_end_0 = const()[name = tensor("new_v_15_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_15_end_mask_0 = const()[name = tensor("new_v_15_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_15_squeeze_mask_0 = const()[name = tensor("new_v_15_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_15_cast_fp16 = slice_by_index(begin = new_v_15_begin_0, end = new_v_15_end_0, end_mask = new_v_15_end_mask_0, squeeze_mask = new_v_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor("new_v_15_cast_fp16")]; tensor sa_k_in_7_to_fp16_dtype_0 = const()[name = tensor("sa_k_in_7_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_k_in_7_to_fp16 = cast(dtype = sa_k_in_7_to_fp16_dtype_0, x = sa_k_in_7)[name = tensor("cast_46")]; tensor var_2420_cast_fp16 = mul(x = sa_k_in_7_to_fp16, y = var_599_cast_fp16)[name = tensor("op_2420_cast_fp16")]; tensor var_2421_cast_fp16 = mul(x = new_k_15_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_2421_cast_fp16")]; tensor sa_k_out_15_cast_fp16 = add(x = var_2420_cast_fp16, y = var_2421_cast_fp16)[name = tensor("sa_k_out_15_cast_fp16")]; tensor sa_k_out_15_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_k_out_15_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor sa_v_in_7_to_fp16_dtype_0 = const()[name = tensor("sa_v_in_7_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_v_in_7_to_fp16 = cast(dtype = sa_v_in_7_to_fp16_dtype_0, x = sa_v_in_7)[name = tensor("cast_45")]; tensor var_2427_cast_fp16 = mul(x = sa_v_in_7_to_fp16, y = var_599_cast_fp16)[name = tensor("op_2427_cast_fp16")]; tensor var_2428_cast_fp16 = mul(x = new_v_15_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_2428_cast_fp16")]; tensor sa_v_out_15_cast_fp16 = add(x = var_2427_cast_fp16, y = var_2428_cast_fp16)[name = tensor("sa_v_out_15_cast_fp16")]; tensor sa_v_out_15_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_v_out_15_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_2447 = const()[name = tensor("op_2447"), val = tensor([0, 2, -3, -1])]; tensor var_2449_transpose_x_0 = const()[name = tensor("op_2449_transpose_x_0"), val = tensor(false)]; tensor var_2449_transpose_y_0 = const()[name = tensor("op_2449_transpose_y_0"), val = tensor(false)]; tensor transpose_124_perm_0 = const()[name = tensor("transpose_124_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_125_perm_0 = const()[name = tensor("transpose_125_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_125 = transpose(perm = transpose_125_perm_0, x = sa_k_out_15_cast_fp16)[name = tensor("transpose_192")]; tensor transpose_124 = transpose(perm = transpose_124_perm_0, x = q_15_cast_fp16)[name = tensor("transpose_193")]; tensor var_2449_cast_fp16 = matmul(transpose_x = var_2449_transpose_x_0, transpose_y = var_2449_transpose_y_0, x = transpose_124, y = transpose_125)[name = tensor("op_2449_cast_fp16")]; tensor var_2450_to_fp16 = const()[name = tensor("op_2450_to_fp16"), val = tensor(0x1p-3)]; tensor scores_29_cast_fp16 = mul(x = var_2449_cast_fp16, y = var_2450_to_fp16)[name = tensor("scores_29_cast_fp16")]; tensor var_2468_to_fp16 = const()[name = tensor("op_2468_to_fp16"), val = tensor(-inf)]; tensor scores_31_cast_fp16 = select(a = var_2468_to_fp16, b = scores_29_cast_fp16, cond = var_647_cast_fp16)[name = tensor("scores_31_cast_fp16")]; tensor var_2470 = const()[name = tensor("op_2470"), val = tensor(-1)]; tensor probs_15_cast_fp16 = softmax(axis = var_2470, x = scores_31_cast_fp16)[name = tensor("probs_15_cast_fp16")]; tensor var_2473_transpose_x_0 = const()[name = tensor("op_2473_transpose_x_0"), val = tensor(false)]; tensor var_2473_transpose_y_0 = const()[name = tensor("op_2473_transpose_y_0"), val = tensor(false)]; tensor v_t_15_cast_fp16 = transpose(perm = var_2447, x = sa_v_out_15_cast_fp16)[name = tensor("transpose_191")]; tensor var_2473_cast_fp16 = matmul(transpose_x = var_2473_transpose_x_0, transpose_y = var_2473_transpose_y_0, x = probs_15_cast_fp16, y = v_t_15_cast_fp16)[name = tensor("op_2473_cast_fp16")]; tensor var_2478 = const()[name = tensor("op_2478"), val = tensor([0, 2, 1, 3])]; tensor var_2483 = const()[name = tensor("op_2483"), val = tensor([1, 1, -1])]; tensor var_2479_cast_fp16 = transpose(perm = var_2478, x = var_2473_cast_fp16)[name = tensor("transpose_190")]; tensor input_105_cast_fp16 = reshape(shape = var_2483, x = var_2479_cast_fp16)[name = tensor("input_105_cast_fp16")]; tensor dec_layers_7_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_7_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54439232))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55029120))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_7_self_attention_o_net_weight_to_fp16_quantized, x = input_105_cast_fp16)[name = tensor("linear_29_cast_fp16")]; tensor input_107_cast_fp16 = add(x = input_101_cast_fp16, y = linear_29_cast_fp16)[name = tensor("input_107_cast_fp16")]; tensor input_109_axes_0 = const()[name = tensor("input_109_axes_0"), val = tensor([-1])]; tensor dec_layers_7_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_7_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55030720)))]; tensor var_2491_to_fp16 = const()[name = tensor("op_2491_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_109_cast_fp16 = layer_norm(axes = input_109_axes_0, epsilon = var_2491_to_fp16, gamma = dec_layers_7_norm_xattn_query_weight_to_fp16, x = input_107_cast_fp16)[name = tensor("input_109_cast_fp16")]; tensor dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55032320))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55130688))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4057280)))]; tensor linear_30_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized, x = input_109_cast_fp16)[name = tensor("linear_30_cast_fp16")]; tensor var_2504 = const()[name = tensor("op_2504"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_15_cast_fp16 = reshape(shape = var_2504, x = linear_30_cast_fp16)[name = tensor("xq_proj_15_cast_fp16")]; tensor var_2522 = const()[name = tensor("op_2522"), val = tensor([0, 2, -3, -1])]; tensor xa_v_7_to_fp16_dtype_0 = const()[name = tensor("xa_v_7_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_2524_transpose_x_0 = const()[name = tensor("op_2524_transpose_x_0"), val = tensor(false)]; tensor var_2524_transpose_y_0 = const()[name = tensor("op_2524_transpose_y_0"), val = tensor(false)]; tensor xa_k_7_to_fp16_dtype_0 = const()[name = tensor("xa_k_7_to_fp16_dtype_0"), val = tensor("fp16")]; tensor transpose_126_perm_0 = const()[name = tensor("transpose_126_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_127_perm_0 = const()[name = tensor("transpose_127_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_7_to_fp16 = cast(dtype = xa_k_7_to_fp16_dtype_0, x = xa_k_7)[name = tensor("cast_44")]; tensor transpose_127 = transpose(perm = transpose_127_perm_0, x = xa_k_7_to_fp16)[name = tensor("transpose_188")]; tensor transpose_126 = transpose(perm = transpose_126_perm_0, x = xq_proj_15_cast_fp16)[name = tensor("transpose_189")]; tensor var_2524_cast_fp16 = matmul(transpose_x = var_2524_transpose_x_0, transpose_y = var_2524_transpose_y_0, x = transpose_126, y = transpose_127)[name = tensor("op_2524_cast_fp16")]; tensor var_2525_to_fp16 = const()[name = tensor("op_2525_to_fp16"), val = tensor(0x1.6ap-4)]; tensor xscores_29_cast_fp16 = mul(x = var_2524_cast_fp16, y = var_2525_to_fp16)[name = tensor("xscores_29_cast_fp16")]; tensor var_2543_to_fp16 = const()[name = tensor("op_2543_to_fp16"), val = tensor(-inf)]; tensor xscores_31_cast_fp16 = select(a = var_2543_to_fp16, b = xscores_29_cast_fp16, cond = var_722_cast_fp16)[name = tensor("xscores_31_cast_fp16")]; tensor var_2545 = const()[name = tensor("op_2545"), val = tensor(-1)]; tensor xprobs_15_cast_fp16 = softmax(axis = var_2545, x = xscores_31_cast_fp16)[name = tensor("xprobs_15_cast_fp16")]; tensor var_2548_transpose_x_0 = const()[name = tensor("op_2548_transpose_x_0"), val = tensor(false)]; tensor var_2548_transpose_y_0 = const()[name = tensor("op_2548_transpose_y_0"), val = tensor(false)]; tensor xa_v_7_to_fp16 = cast(dtype = xa_v_7_to_fp16_dtype_0, x = xa_v_7)[name = tensor("cast_43")]; tensor xvT_15_cast_fp16 = transpose(perm = var_2522, x = xa_v_7_to_fp16)[name = tensor("transpose_187")]; tensor var_2548_cast_fp16 = matmul(transpose_x = var_2548_transpose_x_0, transpose_y = var_2548_transpose_y_0, x = xprobs_15_cast_fp16, y = xvT_15_cast_fp16)[name = tensor("op_2548_cast_fp16")]; tensor var_2553 = const()[name = tensor("op_2553"), val = tensor([0, 2, 1, 3])]; tensor var_2558 = const()[name = tensor("op_2558"), val = tensor([1, 1, -1])]; tensor var_2554_cast_fp16 = transpose(perm = var_2553, x = var_2548_cast_fp16)[name = tensor("transpose_186")]; tensor input_111_cast_fp16 = reshape(shape = var_2558, x = var_2554_cast_fp16)[name = tensor("input_111_cast_fp16")]; tensor dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55131008))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55229376))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized, x = input_111_cast_fp16)[name = tensor("linear_31_cast_fp16")]; tensor input_113_cast_fp16 = add(x = input_107_cast_fp16, y = linear_31_cast_fp16)[name = tensor("input_113_cast_fp16")]; tensor x_57_axes_0 = const()[name = tensor("x_57_axes_0"), val = tensor([-1])]; tensor dec_layers_7_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_7_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55230976)))]; tensor var_2566_to_fp16 = const()[name = tensor("op_2566_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_57_cast_fp16 = layer_norm(axes = x_57_axes_0, epsilon = var_2566_to_fp16, gamma = dec_layers_7_norm_pos_ff_weight_to_fp16, x = input_113_cast_fp16)[name = tensor("x_57_cast_fp16")]; tensor var_2582 = const()[name = tensor("op_2582"), val = tensor([0, 2, 1])]; tensor y_29_pad_type_0 = const()[name = tensor("y_29_pad_type_0"), val = tensor("valid")]; tensor y_29_strides_0 = const()[name = tensor("y_29_strides_0"), val = tensor([1])]; tensor y_29_pad_0 = const()[name = tensor("y_29_pad_0"), val = tensor([0, 0])]; tensor y_29_dilations_0 = const()[name = tensor("y_29_dilations_0"), val = tensor([1])]; tensor y_29_groups_0 = const()[name = tensor("y_29_groups_0"), val = tensor(1)]; tensor dec_layers_7_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_7_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55232576))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57591936))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6519040)))]; tensor x_59_cast_fp16 = transpose(perm = var_2582, x = x_57_cast_fp16)[name = tensor("transpose_185")]; tensor y_29_cast_fp16 = conv(dilations = y_29_dilations_0, groups = y_29_groups_0, pad = y_29_pad_0, pad_type = y_29_pad_type_0, strides = y_29_strides_0, weight = dec_layers_7_pos_ff_proj_weight_to_fp16_quantized, x = x_59_cast_fp16)[name = tensor("y_29_cast_fp16")]; tensor x_61_mode_0 = const()[name = tensor("x_61_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_61_cast_fp16 = gelu(mode = x_61_mode_0, x = y_29_cast_fp16)[name = tensor("x_61_cast_fp16")]; tensor y_31_pad_type_0 = const()[name = tensor("y_31_pad_type_0"), val = tensor("valid")]; tensor y_31_strides_0 = const()[name = tensor("y_31_strides_0"), val = tensor([1])]; tensor y_31_pad_0 = const()[name = tensor("y_31_pad_0"), val = tensor([0, 0])]; tensor y_31_dilations_0 = const()[name = tensor("y_31_dilations_0"), val = tensor([1])]; tensor y_31_groups_0 = const()[name = tensor("y_31_groups_0"), val = tensor(1)]; tensor dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57598144))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59957504))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor y_31_cast_fp16 = conv(dilations = y_31_dilations_0, groups = y_31_groups_0, pad = y_31_pad_0, pad_type = y_31_pad_type_0, strides = y_31_strides_0, weight = dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized, x = x_61_cast_fp16)[name = tensor("y_31_cast_fp16")]; tensor var_2600 = const()[name = tensor("op_2600"), val = tensor([0, 2, 1])]; tensor var_2601_cast_fp16 = transpose(perm = var_2600, x = y_31_cast_fp16)[name = tensor("transpose_184")]; tensor input_115_cast_fp16 = add(x = input_113_cast_fp16, y = var_2601_cast_fp16)[name = tensor("input_115_cast_fp16")]; tensor input_117_axes_0 = const()[name = tensor("input_117_axes_0"), val = tensor([-1])]; tensor dec_layers_8_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_8_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59959104)))]; tensor var_2605_to_fp16 = const()[name = tensor("op_2605_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_117_cast_fp16 = layer_norm(axes = input_117_axes_0, epsilon = var_2605_to_fp16, gamma = dec_layers_8_norm_self_weight_to_fp16, x = input_115_cast_fp16)[name = tensor("input_117_cast_fp16")]; tensor dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59960704))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61730240))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3351680)))]; tensor linear_32_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized, x = input_117_cast_fp16)[name = tensor("linear_32_cast_fp16")]; tensor var_2619 = const()[name = tensor("op_2619"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_35_cast_fp16 = reshape(shape = var_2619, x = linear_32_cast_fp16)[name = tensor("qkv_35_cast_fp16")]; tensor q_17_begin_0 = const()[name = tensor("q_17_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_17_end_0 = const()[name = tensor("q_17_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_17_end_mask_0 = const()[name = tensor("q_17_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_17_squeeze_mask_0 = const()[name = tensor("q_17_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_17_cast_fp16 = slice_by_index(begin = q_17_begin_0, end = q_17_end_0, end_mask = q_17_end_mask_0, squeeze_mask = q_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor("q_17_cast_fp16")]; tensor new_k_17_begin_0 = const()[name = tensor("new_k_17_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_17_end_0 = const()[name = tensor("new_k_17_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_17_end_mask_0 = const()[name = tensor("new_k_17_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_17_squeeze_mask_0 = const()[name = tensor("new_k_17_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_17_cast_fp16 = slice_by_index(begin = new_k_17_begin_0, end = new_k_17_end_0, end_mask = new_k_17_end_mask_0, squeeze_mask = new_k_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor("new_k_17_cast_fp16")]; tensor new_v_17_begin_0 = const()[name = tensor("new_v_17_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_17_end_0 = const()[name = tensor("new_v_17_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_17_end_mask_0 = const()[name = tensor("new_v_17_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_17_squeeze_mask_0 = const()[name = tensor("new_v_17_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_17_cast_fp16 = slice_by_index(begin = new_v_17_begin_0, end = new_v_17_end_0, end_mask = new_v_17_end_mask_0, squeeze_mask = new_v_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor("new_v_17_cast_fp16")]; tensor sa_k_in_8_to_fp16_dtype_0 = const()[name = tensor("sa_k_in_8_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_k_in_8_to_fp16 = cast(dtype = sa_k_in_8_to_fp16_dtype_0, x = sa_k_in_8)[name = tensor("cast_42")]; tensor var_2680_cast_fp16 = mul(x = sa_k_in_8_to_fp16, y = var_599_cast_fp16)[name = tensor("op_2680_cast_fp16")]; tensor var_2681_cast_fp16 = mul(x = new_k_17_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_2681_cast_fp16")]; tensor sa_k_out_17_cast_fp16 = add(x = var_2680_cast_fp16, y = var_2681_cast_fp16)[name = tensor("sa_k_out_17_cast_fp16")]; tensor sa_k_out_17_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_k_out_17_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor sa_v_in_8_to_fp16_dtype_0 = const()[name = tensor("sa_v_in_8_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_v_in_8_to_fp16 = cast(dtype = sa_v_in_8_to_fp16_dtype_0, x = sa_v_in_8)[name = tensor("cast_41")]; tensor var_2687_cast_fp16 = mul(x = sa_v_in_8_to_fp16, y = var_599_cast_fp16)[name = tensor("op_2687_cast_fp16")]; tensor var_2688_cast_fp16 = mul(x = new_v_17_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_2688_cast_fp16")]; tensor sa_v_out_17_cast_fp16 = add(x = var_2687_cast_fp16, y = var_2688_cast_fp16)[name = tensor("sa_v_out_17_cast_fp16")]; tensor sa_v_out_17_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_v_out_17_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_2707 = const()[name = tensor("op_2707"), val = tensor([0, 2, -3, -1])]; tensor var_2709_transpose_x_0 = const()[name = tensor("op_2709_transpose_x_0"), val = tensor(false)]; tensor var_2709_transpose_y_0 = const()[name = tensor("op_2709_transpose_y_0"), val = tensor(false)]; tensor transpose_128_perm_0 = const()[name = tensor("transpose_128_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_129_perm_0 = const()[name = tensor("transpose_129_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_129 = transpose(perm = transpose_129_perm_0, x = sa_k_out_17_cast_fp16)[name = tensor("transpose_182")]; tensor transpose_128 = transpose(perm = transpose_128_perm_0, x = q_17_cast_fp16)[name = tensor("transpose_183")]; tensor var_2709_cast_fp16 = matmul(transpose_x = var_2709_transpose_x_0, transpose_y = var_2709_transpose_y_0, x = transpose_128, y = transpose_129)[name = tensor("op_2709_cast_fp16")]; tensor var_2710_to_fp16 = const()[name = tensor("op_2710_to_fp16"), val = tensor(0x1p-3)]; tensor scores_33_cast_fp16 = mul(x = var_2709_cast_fp16, y = var_2710_to_fp16)[name = tensor("scores_33_cast_fp16")]; tensor var_2728_to_fp16 = const()[name = tensor("op_2728_to_fp16"), val = tensor(-inf)]; tensor scores_35_cast_fp16 = select(a = var_2728_to_fp16, b = scores_33_cast_fp16, cond = var_647_cast_fp16)[name = tensor("scores_35_cast_fp16")]; tensor var_2730 = const()[name = tensor("op_2730"), val = tensor(-1)]; tensor probs_17_cast_fp16 = softmax(axis = var_2730, x = scores_35_cast_fp16)[name = tensor("probs_17_cast_fp16")]; tensor var_2733_transpose_x_0 = const()[name = tensor("op_2733_transpose_x_0"), val = tensor(false)]; tensor var_2733_transpose_y_0 = const()[name = tensor("op_2733_transpose_y_0"), val = tensor(false)]; tensor v_t_17_cast_fp16 = transpose(perm = var_2707, x = sa_v_out_17_cast_fp16)[name = tensor("transpose_181")]; tensor var_2733_cast_fp16 = matmul(transpose_x = var_2733_transpose_x_0, transpose_y = var_2733_transpose_y_0, x = probs_17_cast_fp16, y = v_t_17_cast_fp16)[name = tensor("op_2733_cast_fp16")]; tensor var_2738 = const()[name = tensor("op_2738"), val = tensor([0, 2, 1, 3])]; tensor var_2743 = const()[name = tensor("op_2743"), val = tensor([1, 1, -1])]; tensor var_2739_cast_fp16 = transpose(perm = var_2738, x = var_2733_cast_fp16)[name = tensor("transpose_180")]; tensor input_119_cast_fp16 = reshape(shape = var_2743, x = var_2739_cast_fp16)[name = tensor("input_119_cast_fp16")]; tensor dec_layers_8_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_8_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61734912))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62324800))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_8_self_attention_o_net_weight_to_fp16_quantized, x = input_119_cast_fp16)[name = tensor("linear_33_cast_fp16")]; tensor input_121_cast_fp16 = add(x = input_115_cast_fp16, y = linear_33_cast_fp16)[name = tensor("input_121_cast_fp16")]; tensor input_123_axes_0 = const()[name = tensor("input_123_axes_0"), val = tensor([-1])]; tensor dec_layers_8_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_8_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62326400)))]; tensor var_2751_to_fp16 = const()[name = tensor("op_2751_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_123_cast_fp16 = layer_norm(axes = input_123_axes_0, epsilon = var_2751_to_fp16, gamma = dec_layers_8_norm_xattn_query_weight_to_fp16, x = input_121_cast_fp16)[name = tensor("input_123_cast_fp16")]; tensor dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62328000))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62426368))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4057280)))]; tensor linear_34_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = tensor("linear_34_cast_fp16")]; tensor var_2764 = const()[name = tensor("op_2764"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_17_cast_fp16 = reshape(shape = var_2764, x = linear_34_cast_fp16)[name = tensor("xq_proj_17_cast_fp16")]; tensor var_2782 = const()[name = tensor("op_2782"), val = tensor([0, 2, -3, -1])]; tensor xa_v_8_to_fp16_dtype_0 = const()[name = tensor("xa_v_8_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_2784_transpose_x_0 = const()[name = tensor("op_2784_transpose_x_0"), val = tensor(false)]; tensor var_2784_transpose_y_0 = const()[name = tensor("op_2784_transpose_y_0"), val = tensor(false)]; tensor xa_k_8_to_fp16_dtype_0 = const()[name = tensor("xa_k_8_to_fp16_dtype_0"), val = tensor("fp16")]; tensor transpose_130_perm_0 = const()[name = tensor("transpose_130_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_131_perm_0 = const()[name = tensor("transpose_131_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_8_to_fp16 = cast(dtype = xa_k_8_to_fp16_dtype_0, x = xa_k_8)[name = tensor("cast_40")]; tensor transpose_131 = transpose(perm = transpose_131_perm_0, x = xa_k_8_to_fp16)[name = tensor("transpose_178")]; tensor transpose_130 = transpose(perm = transpose_130_perm_0, x = xq_proj_17_cast_fp16)[name = tensor("transpose_179")]; tensor var_2784_cast_fp16 = matmul(transpose_x = var_2784_transpose_x_0, transpose_y = var_2784_transpose_y_0, x = transpose_130, y = transpose_131)[name = tensor("op_2784_cast_fp16")]; tensor var_2785_to_fp16 = const()[name = tensor("op_2785_to_fp16"), val = tensor(0x1.6ap-4)]; tensor xscores_33_cast_fp16 = mul(x = var_2784_cast_fp16, y = var_2785_to_fp16)[name = tensor("xscores_33_cast_fp16")]; tensor var_2803_to_fp16 = const()[name = tensor("op_2803_to_fp16"), val = tensor(-inf)]; tensor xscores_35_cast_fp16 = select(a = var_2803_to_fp16, b = xscores_33_cast_fp16, cond = var_722_cast_fp16)[name = tensor("xscores_35_cast_fp16")]; tensor var_2805 = const()[name = tensor("op_2805"), val = tensor(-1)]; tensor xprobs_17_cast_fp16 = softmax(axis = var_2805, x = xscores_35_cast_fp16)[name = tensor("xprobs_17_cast_fp16")]; tensor var_2808_transpose_x_0 = const()[name = tensor("op_2808_transpose_x_0"), val = tensor(false)]; tensor var_2808_transpose_y_0 = const()[name = tensor("op_2808_transpose_y_0"), val = tensor(false)]; tensor xa_v_8_to_fp16 = cast(dtype = xa_v_8_to_fp16_dtype_0, x = xa_v_8)[name = tensor("cast_39")]; tensor xvT_17_cast_fp16 = transpose(perm = var_2782, x = xa_v_8_to_fp16)[name = tensor("transpose_177")]; tensor var_2808_cast_fp16 = matmul(transpose_x = var_2808_transpose_x_0, transpose_y = var_2808_transpose_y_0, x = xprobs_17_cast_fp16, y = xvT_17_cast_fp16)[name = tensor("op_2808_cast_fp16")]; tensor var_2813 = const()[name = tensor("op_2813"), val = tensor([0, 2, 1, 3])]; tensor var_2818 = const()[name = tensor("op_2818"), val = tensor([1, 1, -1])]; tensor var_2814_cast_fp16 = transpose(perm = var_2813, x = var_2808_cast_fp16)[name = tensor("transpose_176")]; tensor input_125_cast_fp16 = reshape(shape = var_2818, x = var_2814_cast_fp16)[name = tensor("input_125_cast_fp16")]; tensor dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62426688))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62525056))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_35_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized, x = input_125_cast_fp16)[name = tensor("linear_35_cast_fp16")]; tensor input_127_cast_fp16 = add(x = input_121_cast_fp16, y = linear_35_cast_fp16)[name = tensor("input_127_cast_fp16")]; tensor x_65_axes_0 = const()[name = tensor("x_65_axes_0"), val = tensor([-1])]; tensor dec_layers_8_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_8_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62526656)))]; tensor var_2826_to_fp16 = const()[name = tensor("op_2826_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_65_cast_fp16 = layer_norm(axes = x_65_axes_0, epsilon = var_2826_to_fp16, gamma = dec_layers_8_norm_pos_ff_weight_to_fp16, x = input_127_cast_fp16)[name = tensor("x_65_cast_fp16")]; tensor var_2842 = const()[name = tensor("op_2842"), val = tensor([0, 2, 1])]; tensor y_33_pad_type_0 = const()[name = tensor("y_33_pad_type_0"), val = tensor("valid")]; tensor y_33_strides_0 = const()[name = tensor("y_33_strides_0"), val = tensor([1])]; tensor y_33_pad_0 = const()[name = tensor("y_33_pad_0"), val = tensor([0, 0])]; tensor y_33_dilations_0 = const()[name = tensor("y_33_dilations_0"), val = tensor([1])]; tensor y_33_groups_0 = const()[name = tensor("y_33_groups_0"), val = tensor(1)]; tensor dec_layers_8_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_8_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62528256))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64887616))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6519040)))]; tensor x_67_cast_fp16 = transpose(perm = var_2842, x = x_65_cast_fp16)[name = tensor("transpose_175")]; tensor y_33_cast_fp16 = conv(dilations = y_33_dilations_0, groups = y_33_groups_0, pad = y_33_pad_0, pad_type = y_33_pad_type_0, strides = y_33_strides_0, weight = dec_layers_8_pos_ff_proj_weight_to_fp16_quantized, x = x_67_cast_fp16)[name = tensor("y_33_cast_fp16")]; tensor x_69_mode_0 = const()[name = tensor("x_69_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_69_cast_fp16 = gelu(mode = x_69_mode_0, x = y_33_cast_fp16)[name = tensor("x_69_cast_fp16")]; tensor y_35_pad_type_0 = const()[name = tensor("y_35_pad_type_0"), val = tensor("valid")]; tensor y_35_strides_0 = const()[name = tensor("y_35_strides_0"), val = tensor([1])]; tensor y_35_pad_0 = const()[name = tensor("y_35_pad_0"), val = tensor([0, 0])]; tensor y_35_dilations_0 = const()[name = tensor("y_35_dilations_0"), val = tensor([1])]; tensor y_35_groups_0 = const()[name = tensor("y_35_groups_0"), val = tensor(1)]; tensor dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64893824))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67253184))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor y_35_cast_fp16 = conv(dilations = y_35_dilations_0, groups = y_35_groups_0, pad = y_35_pad_0, pad_type = y_35_pad_type_0, strides = y_35_strides_0, weight = dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized, x = x_69_cast_fp16)[name = tensor("y_35_cast_fp16")]; tensor var_2860 = const()[name = tensor("op_2860"), val = tensor([0, 2, 1])]; tensor var_2861_cast_fp16 = transpose(perm = var_2860, x = y_35_cast_fp16)[name = tensor("transpose_174")]; tensor input_129_cast_fp16 = add(x = input_127_cast_fp16, y = var_2861_cast_fp16)[name = tensor("input_129_cast_fp16")]; tensor input_131_axes_0 = const()[name = tensor("input_131_axes_0"), val = tensor([-1])]; tensor dec_layers_9_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_9_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67254784)))]; tensor var_2865_to_fp16 = const()[name = tensor("op_2865_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_131_cast_fp16 = layer_norm(axes = input_131_axes_0, epsilon = var_2865_to_fp16, gamma = dec_layers_9_norm_self_weight_to_fp16, x = input_129_cast_fp16)[name = tensor("input_131_cast_fp16")]; tensor dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67256384))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69025920))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3351680)))]; tensor linear_36_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized, x = input_131_cast_fp16)[name = tensor("linear_36_cast_fp16")]; tensor var_2879 = const()[name = tensor("op_2879"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_39_cast_fp16 = reshape(shape = var_2879, x = linear_36_cast_fp16)[name = tensor("qkv_39_cast_fp16")]; tensor q_19_begin_0 = const()[name = tensor("q_19_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_19_end_0 = const()[name = tensor("q_19_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_19_end_mask_0 = const()[name = tensor("q_19_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_19_squeeze_mask_0 = const()[name = tensor("q_19_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_19_cast_fp16 = slice_by_index(begin = q_19_begin_0, end = q_19_end_0, end_mask = q_19_end_mask_0, squeeze_mask = q_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor("q_19_cast_fp16")]; tensor new_k_19_begin_0 = const()[name = tensor("new_k_19_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_19_end_0 = const()[name = tensor("new_k_19_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_19_end_mask_0 = const()[name = tensor("new_k_19_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_19_squeeze_mask_0 = const()[name = tensor("new_k_19_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_19_cast_fp16 = slice_by_index(begin = new_k_19_begin_0, end = new_k_19_end_0, end_mask = new_k_19_end_mask_0, squeeze_mask = new_k_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor("new_k_19_cast_fp16")]; tensor new_v_19_begin_0 = const()[name = tensor("new_v_19_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_19_end_0 = const()[name = tensor("new_v_19_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_19_end_mask_0 = const()[name = tensor("new_v_19_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_19_squeeze_mask_0 = const()[name = tensor("new_v_19_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_19_cast_fp16 = slice_by_index(begin = new_v_19_begin_0, end = new_v_19_end_0, end_mask = new_v_19_end_mask_0, squeeze_mask = new_v_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor("new_v_19_cast_fp16")]; tensor sa_k_in_9_to_fp16_dtype_0 = const()[name = tensor("sa_k_in_9_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_k_in_9_to_fp16 = cast(dtype = sa_k_in_9_to_fp16_dtype_0, x = sa_k_in_9)[name = tensor("cast_38")]; tensor var_2940_cast_fp16 = mul(x = sa_k_in_9_to_fp16, y = var_599_cast_fp16)[name = tensor("op_2940_cast_fp16")]; tensor var_2941_cast_fp16 = mul(x = new_k_19_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_2941_cast_fp16")]; tensor sa_k_out_19_cast_fp16 = add(x = var_2940_cast_fp16, y = var_2941_cast_fp16)[name = tensor("sa_k_out_19_cast_fp16")]; tensor sa_k_out_19_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_k_out_19_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor sa_v_in_9_to_fp16_dtype_0 = const()[name = tensor("sa_v_in_9_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_v_in_9_to_fp16 = cast(dtype = sa_v_in_9_to_fp16_dtype_0, x = sa_v_in_9)[name = tensor("cast_37")]; tensor var_2947_cast_fp16 = mul(x = sa_v_in_9_to_fp16, y = var_599_cast_fp16)[name = tensor("op_2947_cast_fp16")]; tensor var_2948_cast_fp16 = mul(x = new_v_19_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_2948_cast_fp16")]; tensor sa_v_out_19_cast_fp16 = add(x = var_2947_cast_fp16, y = var_2948_cast_fp16)[name = tensor("sa_v_out_19_cast_fp16")]; tensor sa_v_out_19_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_v_out_19_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_2967 = const()[name = tensor("op_2967"), val = tensor([0, 2, -3, -1])]; tensor var_2969_transpose_x_0 = const()[name = tensor("op_2969_transpose_x_0"), val = tensor(false)]; tensor var_2969_transpose_y_0 = const()[name = tensor("op_2969_transpose_y_0"), val = tensor(false)]; tensor transpose_132_perm_0 = const()[name = tensor("transpose_132_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_133_perm_0 = const()[name = tensor("transpose_133_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_133 = transpose(perm = transpose_133_perm_0, x = sa_k_out_19_cast_fp16)[name = tensor("transpose_172")]; tensor transpose_132 = transpose(perm = transpose_132_perm_0, x = q_19_cast_fp16)[name = tensor("transpose_173")]; tensor var_2969_cast_fp16 = matmul(transpose_x = var_2969_transpose_x_0, transpose_y = var_2969_transpose_y_0, x = transpose_132, y = transpose_133)[name = tensor("op_2969_cast_fp16")]; tensor var_2970_to_fp16 = const()[name = tensor("op_2970_to_fp16"), val = tensor(0x1p-3)]; tensor scores_37_cast_fp16 = mul(x = var_2969_cast_fp16, y = var_2970_to_fp16)[name = tensor("scores_37_cast_fp16")]; tensor var_2988_to_fp16 = const()[name = tensor("op_2988_to_fp16"), val = tensor(-inf)]; tensor scores_39_cast_fp16 = select(a = var_2988_to_fp16, b = scores_37_cast_fp16, cond = var_647_cast_fp16)[name = tensor("scores_39_cast_fp16")]; tensor var_2990 = const()[name = tensor("op_2990"), val = tensor(-1)]; tensor probs_19_cast_fp16 = softmax(axis = var_2990, x = scores_39_cast_fp16)[name = tensor("probs_19_cast_fp16")]; tensor var_2993_transpose_x_0 = const()[name = tensor("op_2993_transpose_x_0"), val = tensor(false)]; tensor var_2993_transpose_y_0 = const()[name = tensor("op_2993_transpose_y_0"), val = tensor(false)]; tensor v_t_19_cast_fp16 = transpose(perm = var_2967, x = sa_v_out_19_cast_fp16)[name = tensor("transpose_171")]; tensor var_2993_cast_fp16 = matmul(transpose_x = var_2993_transpose_x_0, transpose_y = var_2993_transpose_y_0, x = probs_19_cast_fp16, y = v_t_19_cast_fp16)[name = tensor("op_2993_cast_fp16")]; tensor var_2998 = const()[name = tensor("op_2998"), val = tensor([0, 2, 1, 3])]; tensor var_3003 = const()[name = tensor("op_3003"), val = tensor([1, 1, -1])]; tensor var_2999_cast_fp16 = transpose(perm = var_2998, x = var_2993_cast_fp16)[name = tensor("transpose_170")]; tensor input_133_cast_fp16 = reshape(shape = var_3003, x = var_2999_cast_fp16)[name = tensor("input_133_cast_fp16")]; tensor dec_layers_9_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_9_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69030592))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69620480))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_9_self_attention_o_net_weight_to_fp16_quantized, x = input_133_cast_fp16)[name = tensor("linear_37_cast_fp16")]; tensor input_135_cast_fp16 = add(x = input_129_cast_fp16, y = linear_37_cast_fp16)[name = tensor("input_135_cast_fp16")]; tensor input_137_axes_0 = const()[name = tensor("input_137_axes_0"), val = tensor([-1])]; tensor dec_layers_9_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_9_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69622080)))]; tensor var_3011_to_fp16 = const()[name = tensor("op_3011_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_137_cast_fp16 = layer_norm(axes = input_137_axes_0, epsilon = var_3011_to_fp16, gamma = dec_layers_9_norm_xattn_query_weight_to_fp16, x = input_135_cast_fp16)[name = tensor("input_137_cast_fp16")]; tensor dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69623680))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69722048))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4057280)))]; tensor linear_38_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized, x = input_137_cast_fp16)[name = tensor("linear_38_cast_fp16")]; tensor var_3024 = const()[name = tensor("op_3024"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_19_cast_fp16 = reshape(shape = var_3024, x = linear_38_cast_fp16)[name = tensor("xq_proj_19_cast_fp16")]; tensor var_3042 = const()[name = tensor("op_3042"), val = tensor([0, 2, -3, -1])]; tensor xa_v_9_to_fp16_dtype_0 = const()[name = tensor("xa_v_9_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_3044_transpose_x_0 = const()[name = tensor("op_3044_transpose_x_0"), val = tensor(false)]; tensor var_3044_transpose_y_0 = const()[name = tensor("op_3044_transpose_y_0"), val = tensor(false)]; tensor xa_k_9_to_fp16_dtype_0 = const()[name = tensor("xa_k_9_to_fp16_dtype_0"), val = tensor("fp16")]; tensor transpose_134_perm_0 = const()[name = tensor("transpose_134_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_135_perm_0 = const()[name = tensor("transpose_135_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_9_to_fp16 = cast(dtype = xa_k_9_to_fp16_dtype_0, x = xa_k_9)[name = tensor("cast_36")]; tensor transpose_135 = transpose(perm = transpose_135_perm_0, x = xa_k_9_to_fp16)[name = tensor("transpose_168")]; tensor transpose_134 = transpose(perm = transpose_134_perm_0, x = xq_proj_19_cast_fp16)[name = tensor("transpose_169")]; tensor var_3044_cast_fp16 = matmul(transpose_x = var_3044_transpose_x_0, transpose_y = var_3044_transpose_y_0, x = transpose_134, y = transpose_135)[name = tensor("op_3044_cast_fp16")]; tensor var_3045_to_fp16 = const()[name = tensor("op_3045_to_fp16"), val = tensor(0x1.6ap-4)]; tensor xscores_37_cast_fp16 = mul(x = var_3044_cast_fp16, y = var_3045_to_fp16)[name = tensor("xscores_37_cast_fp16")]; tensor var_3063_to_fp16 = const()[name = tensor("op_3063_to_fp16"), val = tensor(-inf)]; tensor xscores_39_cast_fp16 = select(a = var_3063_to_fp16, b = xscores_37_cast_fp16, cond = var_722_cast_fp16)[name = tensor("xscores_39_cast_fp16")]; tensor var_3065 = const()[name = tensor("op_3065"), val = tensor(-1)]; tensor xprobs_19_cast_fp16 = softmax(axis = var_3065, x = xscores_39_cast_fp16)[name = tensor("xprobs_19_cast_fp16")]; tensor var_3068_transpose_x_0 = const()[name = tensor("op_3068_transpose_x_0"), val = tensor(false)]; tensor var_3068_transpose_y_0 = const()[name = tensor("op_3068_transpose_y_0"), val = tensor(false)]; tensor xa_v_9_to_fp16 = cast(dtype = xa_v_9_to_fp16_dtype_0, x = xa_v_9)[name = tensor("cast_35")]; tensor xvT_19_cast_fp16 = transpose(perm = var_3042, x = xa_v_9_to_fp16)[name = tensor("transpose_167")]; tensor var_3068_cast_fp16 = matmul(transpose_x = var_3068_transpose_x_0, transpose_y = var_3068_transpose_y_0, x = xprobs_19_cast_fp16, y = xvT_19_cast_fp16)[name = tensor("op_3068_cast_fp16")]; tensor var_3073 = const()[name = tensor("op_3073"), val = tensor([0, 2, 1, 3])]; tensor var_3078 = const()[name = tensor("op_3078"), val = tensor([1, 1, -1])]; tensor var_3074_cast_fp16 = transpose(perm = var_3073, x = var_3068_cast_fp16)[name = tensor("transpose_166")]; tensor input_139_cast_fp16 = reshape(shape = var_3078, x = var_3074_cast_fp16)[name = tensor("input_139_cast_fp16")]; tensor dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69722368))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69820736))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_39_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized, x = input_139_cast_fp16)[name = tensor("linear_39_cast_fp16")]; tensor input_141_cast_fp16 = add(x = input_135_cast_fp16, y = linear_39_cast_fp16)[name = tensor("input_141_cast_fp16")]; tensor x_73_axes_0 = const()[name = tensor("x_73_axes_0"), val = tensor([-1])]; tensor dec_layers_9_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_9_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69822336)))]; tensor var_3086_to_fp16 = const()[name = tensor("op_3086_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_73_cast_fp16 = layer_norm(axes = x_73_axes_0, epsilon = var_3086_to_fp16, gamma = dec_layers_9_norm_pos_ff_weight_to_fp16, x = input_141_cast_fp16)[name = tensor("x_73_cast_fp16")]; tensor var_3102 = const()[name = tensor("op_3102"), val = tensor([0, 2, 1])]; tensor y_37_pad_type_0 = const()[name = tensor("y_37_pad_type_0"), val = tensor("valid")]; tensor y_37_strides_0 = const()[name = tensor("y_37_strides_0"), val = tensor([1])]; tensor y_37_pad_0 = const()[name = tensor("y_37_pad_0"), val = tensor([0, 0])]; tensor y_37_dilations_0 = const()[name = tensor("y_37_dilations_0"), val = tensor([1])]; tensor y_37_groups_0 = const()[name = tensor("y_37_groups_0"), val = tensor(1)]; tensor dec_layers_9_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_9_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69823936))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72183296))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6519040)))]; tensor x_75_cast_fp16 = transpose(perm = var_3102, x = x_73_cast_fp16)[name = tensor("transpose_165")]; tensor y_37_cast_fp16 = conv(dilations = y_37_dilations_0, groups = y_37_groups_0, pad = y_37_pad_0, pad_type = y_37_pad_type_0, strides = y_37_strides_0, weight = dec_layers_9_pos_ff_proj_weight_to_fp16_quantized, x = x_75_cast_fp16)[name = tensor("y_37_cast_fp16")]; tensor x_77_mode_0 = const()[name = tensor("x_77_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = y_37_cast_fp16)[name = tensor("x_77_cast_fp16")]; tensor y_39_pad_type_0 = const()[name = tensor("y_39_pad_type_0"), val = tensor("valid")]; tensor y_39_strides_0 = const()[name = tensor("y_39_strides_0"), val = tensor([1])]; tensor y_39_pad_0 = const()[name = tensor("y_39_pad_0"), val = tensor([0, 0])]; tensor y_39_dilations_0 = const()[name = tensor("y_39_dilations_0"), val = tensor([1])]; tensor y_39_groups_0 = const()[name = tensor("y_39_groups_0"), val = tensor(1)]; tensor dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72189504))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(74548864))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor y_39_cast_fp16 = conv(dilations = y_39_dilations_0, groups = y_39_groups_0, pad = y_39_pad_0, pad_type = y_39_pad_type_0, strides = y_39_strides_0, weight = dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized, x = x_77_cast_fp16)[name = tensor("y_39_cast_fp16")]; tensor var_3120 = const()[name = tensor("op_3120"), val = tensor([0, 2, 1])]; tensor var_3121_cast_fp16 = transpose(perm = var_3120, x = y_39_cast_fp16)[name = tensor("transpose_164")]; tensor input_143_cast_fp16 = add(x = input_141_cast_fp16, y = var_3121_cast_fp16)[name = tensor("input_143_cast_fp16")]; tensor input_145_axes_0 = const()[name = tensor("input_145_axes_0"), val = tensor([-1])]; tensor dec_layers_10_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_10_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(74550464)))]; tensor var_3125_to_fp16 = const()[name = tensor("op_3125_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_145_cast_fp16 = layer_norm(axes = input_145_axes_0, epsilon = var_3125_to_fp16, gamma = dec_layers_10_norm_self_weight_to_fp16, x = input_143_cast_fp16)[name = tensor("input_145_cast_fp16")]; tensor dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(74552064))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76321600))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3351680)))]; tensor linear_40_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized, x = input_145_cast_fp16)[name = tensor("linear_40_cast_fp16")]; tensor var_3139 = const()[name = tensor("op_3139"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_43_cast_fp16 = reshape(shape = var_3139, x = linear_40_cast_fp16)[name = tensor("qkv_43_cast_fp16")]; tensor q_21_begin_0 = const()[name = tensor("q_21_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_21_end_0 = const()[name = tensor("q_21_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_21_end_mask_0 = const()[name = tensor("q_21_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_21_squeeze_mask_0 = const()[name = tensor("q_21_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_21_cast_fp16 = slice_by_index(begin = q_21_begin_0, end = q_21_end_0, end_mask = q_21_end_mask_0, squeeze_mask = q_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor("q_21_cast_fp16")]; tensor new_k_21_begin_0 = const()[name = tensor("new_k_21_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_21_end_0 = const()[name = tensor("new_k_21_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_21_end_mask_0 = const()[name = tensor("new_k_21_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_21_squeeze_mask_0 = const()[name = tensor("new_k_21_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_21_cast_fp16 = slice_by_index(begin = new_k_21_begin_0, end = new_k_21_end_0, end_mask = new_k_21_end_mask_0, squeeze_mask = new_k_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor("new_k_21_cast_fp16")]; tensor new_v_21_begin_0 = const()[name = tensor("new_v_21_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_21_end_0 = const()[name = tensor("new_v_21_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_21_end_mask_0 = const()[name = tensor("new_v_21_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_21_squeeze_mask_0 = const()[name = tensor("new_v_21_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_21_cast_fp16 = slice_by_index(begin = new_v_21_begin_0, end = new_v_21_end_0, end_mask = new_v_21_end_mask_0, squeeze_mask = new_v_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor("new_v_21_cast_fp16")]; tensor sa_k_in_10_to_fp16_dtype_0 = const()[name = tensor("sa_k_in_10_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_k_in_10_to_fp16 = cast(dtype = sa_k_in_10_to_fp16_dtype_0, x = sa_k_in_10)[name = tensor("cast_34")]; tensor var_3200_cast_fp16 = mul(x = sa_k_in_10_to_fp16, y = var_599_cast_fp16)[name = tensor("op_3200_cast_fp16")]; tensor var_3201_cast_fp16 = mul(x = new_k_21_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_3201_cast_fp16")]; tensor sa_k_out_21_cast_fp16 = add(x = var_3200_cast_fp16, y = var_3201_cast_fp16)[name = tensor("sa_k_out_21_cast_fp16")]; tensor sa_k_out_21_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_k_out_21_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor sa_v_in_10_to_fp16_dtype_0 = const()[name = tensor("sa_v_in_10_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_v_in_10_to_fp16 = cast(dtype = sa_v_in_10_to_fp16_dtype_0, x = sa_v_in_10)[name = tensor("cast_33")]; tensor var_3207_cast_fp16 = mul(x = sa_v_in_10_to_fp16, y = var_599_cast_fp16)[name = tensor("op_3207_cast_fp16")]; tensor var_3208_cast_fp16 = mul(x = new_v_21_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_3208_cast_fp16")]; tensor sa_v_out_21_cast_fp16 = add(x = var_3207_cast_fp16, y = var_3208_cast_fp16)[name = tensor("sa_v_out_21_cast_fp16")]; tensor sa_v_out_21_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_v_out_21_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_3227 = const()[name = tensor("op_3227"), val = tensor([0, 2, -3, -1])]; tensor var_3229_transpose_x_0 = const()[name = tensor("op_3229_transpose_x_0"), val = tensor(false)]; tensor var_3229_transpose_y_0 = const()[name = tensor("op_3229_transpose_y_0"), val = tensor(false)]; tensor transpose_136_perm_0 = const()[name = tensor("transpose_136_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_137_perm_0 = const()[name = tensor("transpose_137_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_137 = transpose(perm = transpose_137_perm_0, x = sa_k_out_21_cast_fp16)[name = tensor("transpose_162")]; tensor transpose_136 = transpose(perm = transpose_136_perm_0, x = q_21_cast_fp16)[name = tensor("transpose_163")]; tensor var_3229_cast_fp16 = matmul(transpose_x = var_3229_transpose_x_0, transpose_y = var_3229_transpose_y_0, x = transpose_136, y = transpose_137)[name = tensor("op_3229_cast_fp16")]; tensor var_3230_to_fp16 = const()[name = tensor("op_3230_to_fp16"), val = tensor(0x1p-3)]; tensor scores_41_cast_fp16 = mul(x = var_3229_cast_fp16, y = var_3230_to_fp16)[name = tensor("scores_41_cast_fp16")]; tensor var_3248_to_fp16 = const()[name = tensor("op_3248_to_fp16"), val = tensor(-inf)]; tensor scores_43_cast_fp16 = select(a = var_3248_to_fp16, b = scores_41_cast_fp16, cond = var_647_cast_fp16)[name = tensor("scores_43_cast_fp16")]; tensor var_3250 = const()[name = tensor("op_3250"), val = tensor(-1)]; tensor probs_21_cast_fp16 = softmax(axis = var_3250, x = scores_43_cast_fp16)[name = tensor("probs_21_cast_fp16")]; tensor var_3253_transpose_x_0 = const()[name = tensor("op_3253_transpose_x_0"), val = tensor(false)]; tensor var_3253_transpose_y_0 = const()[name = tensor("op_3253_transpose_y_0"), val = tensor(false)]; tensor v_t_21_cast_fp16 = transpose(perm = var_3227, x = sa_v_out_21_cast_fp16)[name = tensor("transpose_161")]; tensor var_3253_cast_fp16 = matmul(transpose_x = var_3253_transpose_x_0, transpose_y = var_3253_transpose_y_0, x = probs_21_cast_fp16, y = v_t_21_cast_fp16)[name = tensor("op_3253_cast_fp16")]; tensor var_3258 = const()[name = tensor("op_3258"), val = tensor([0, 2, 1, 3])]; tensor var_3263 = const()[name = tensor("op_3263"), val = tensor([1, 1, -1])]; tensor var_3259_cast_fp16 = transpose(perm = var_3258, x = var_3253_cast_fp16)[name = tensor("transpose_160")]; tensor input_147_cast_fp16 = reshape(shape = var_3263, x = var_3259_cast_fp16)[name = tensor("input_147_cast_fp16")]; tensor dec_layers_10_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_10_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76326272))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76916160))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_10_self_attention_o_net_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = tensor("linear_41_cast_fp16")]; tensor input_149_cast_fp16 = add(x = input_143_cast_fp16, y = linear_41_cast_fp16)[name = tensor("input_149_cast_fp16")]; tensor input_151_axes_0 = const()[name = tensor("input_151_axes_0"), val = tensor([-1])]; tensor dec_layers_10_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_10_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76917760)))]; tensor var_3271_to_fp16 = const()[name = tensor("op_3271_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_151_cast_fp16 = layer_norm(axes = input_151_axes_0, epsilon = var_3271_to_fp16, gamma = dec_layers_10_norm_xattn_query_weight_to_fp16, x = input_149_cast_fp16)[name = tensor("input_151_cast_fp16")]; tensor dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76919360))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77017728))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4057280)))]; tensor linear_42_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized, x = input_151_cast_fp16)[name = tensor("linear_42_cast_fp16")]; tensor var_3284 = const()[name = tensor("op_3284"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_21_cast_fp16 = reshape(shape = var_3284, x = linear_42_cast_fp16)[name = tensor("xq_proj_21_cast_fp16")]; tensor var_3302 = const()[name = tensor("op_3302"), val = tensor([0, 2, -3, -1])]; tensor xa_v_10_to_fp16_dtype_0 = const()[name = tensor("xa_v_10_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_3304_transpose_x_0 = const()[name = tensor("op_3304_transpose_x_0"), val = tensor(false)]; tensor var_3304_transpose_y_0 = const()[name = tensor("op_3304_transpose_y_0"), val = tensor(false)]; tensor xa_k_10_to_fp16_dtype_0 = const()[name = tensor("xa_k_10_to_fp16_dtype_0"), val = tensor("fp16")]; tensor transpose_138_perm_0 = const()[name = tensor("transpose_138_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_139_perm_0 = const()[name = tensor("transpose_139_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_10_to_fp16 = cast(dtype = xa_k_10_to_fp16_dtype_0, x = xa_k_10)[name = tensor("cast_32")]; tensor transpose_139 = transpose(perm = transpose_139_perm_0, x = xa_k_10_to_fp16)[name = tensor("transpose_158")]; tensor transpose_138 = transpose(perm = transpose_138_perm_0, x = xq_proj_21_cast_fp16)[name = tensor("transpose_159")]; tensor var_3304_cast_fp16 = matmul(transpose_x = var_3304_transpose_x_0, transpose_y = var_3304_transpose_y_0, x = transpose_138, y = transpose_139)[name = tensor("op_3304_cast_fp16")]; tensor var_3305_to_fp16 = const()[name = tensor("op_3305_to_fp16"), val = tensor(0x1.6ap-4)]; tensor xscores_41_cast_fp16 = mul(x = var_3304_cast_fp16, y = var_3305_to_fp16)[name = tensor("xscores_41_cast_fp16")]; tensor var_3323_to_fp16 = const()[name = tensor("op_3323_to_fp16"), val = tensor(-inf)]; tensor xscores_43_cast_fp16 = select(a = var_3323_to_fp16, b = xscores_41_cast_fp16, cond = var_722_cast_fp16)[name = tensor("xscores_43_cast_fp16")]; tensor var_3325 = const()[name = tensor("op_3325"), val = tensor(-1)]; tensor xprobs_21_cast_fp16 = softmax(axis = var_3325, x = xscores_43_cast_fp16)[name = tensor("xprobs_21_cast_fp16")]; tensor var_3328_transpose_x_0 = const()[name = tensor("op_3328_transpose_x_0"), val = tensor(false)]; tensor var_3328_transpose_y_0 = const()[name = tensor("op_3328_transpose_y_0"), val = tensor(false)]; tensor xa_v_10_to_fp16 = cast(dtype = xa_v_10_to_fp16_dtype_0, x = xa_v_10)[name = tensor("cast_31")]; tensor xvT_21_cast_fp16 = transpose(perm = var_3302, x = xa_v_10_to_fp16)[name = tensor("transpose_157")]; tensor var_3328_cast_fp16 = matmul(transpose_x = var_3328_transpose_x_0, transpose_y = var_3328_transpose_y_0, x = xprobs_21_cast_fp16, y = xvT_21_cast_fp16)[name = tensor("op_3328_cast_fp16")]; tensor var_3333 = const()[name = tensor("op_3333"), val = tensor([0, 2, 1, 3])]; tensor var_3338 = const()[name = tensor("op_3338"), val = tensor([1, 1, -1])]; tensor var_3334_cast_fp16 = transpose(perm = var_3333, x = var_3328_cast_fp16)[name = tensor("transpose_156")]; tensor input_153_cast_fp16 = reshape(shape = var_3338, x = var_3334_cast_fp16)[name = tensor("input_153_cast_fp16")]; tensor dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77018048))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77116416))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized, x = input_153_cast_fp16)[name = tensor("linear_43_cast_fp16")]; tensor input_155_cast_fp16 = add(x = input_149_cast_fp16, y = linear_43_cast_fp16)[name = tensor("input_155_cast_fp16")]; tensor x_81_axes_0 = const()[name = tensor("x_81_axes_0"), val = tensor([-1])]; tensor dec_layers_10_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_10_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77118016)))]; tensor var_3346_to_fp16 = const()[name = tensor("op_3346_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_81_cast_fp16 = layer_norm(axes = x_81_axes_0, epsilon = var_3346_to_fp16, gamma = dec_layers_10_norm_pos_ff_weight_to_fp16, x = input_155_cast_fp16)[name = tensor("x_81_cast_fp16")]; tensor var_3362 = const()[name = tensor("op_3362"), val = tensor([0, 2, 1])]; tensor y_41_pad_type_0 = const()[name = tensor("y_41_pad_type_0"), val = tensor("valid")]; tensor y_41_strides_0 = const()[name = tensor("y_41_strides_0"), val = tensor([1])]; tensor y_41_pad_0 = const()[name = tensor("y_41_pad_0"), val = tensor([0, 0])]; tensor y_41_dilations_0 = const()[name = tensor("y_41_dilations_0"), val = tensor([1])]; tensor y_41_groups_0 = const()[name = tensor("y_41_groups_0"), val = tensor(1)]; tensor dec_layers_10_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_10_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77119616))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(79478976))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6519040)))]; tensor x_83_cast_fp16 = transpose(perm = var_3362, x = x_81_cast_fp16)[name = tensor("transpose_155")]; tensor y_41_cast_fp16 = conv(dilations = y_41_dilations_0, groups = y_41_groups_0, pad = y_41_pad_0, pad_type = y_41_pad_type_0, strides = y_41_strides_0, weight = dec_layers_10_pos_ff_proj_weight_to_fp16_quantized, x = x_83_cast_fp16)[name = tensor("y_41_cast_fp16")]; tensor x_85_mode_0 = const()[name = tensor("x_85_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_85_cast_fp16 = gelu(mode = x_85_mode_0, x = y_41_cast_fp16)[name = tensor("x_85_cast_fp16")]; tensor y_43_pad_type_0 = const()[name = tensor("y_43_pad_type_0"), val = tensor("valid")]; tensor y_43_strides_0 = const()[name = tensor("y_43_strides_0"), val = tensor([1])]; tensor y_43_pad_0 = const()[name = tensor("y_43_pad_0"), val = tensor([0, 0])]; tensor y_43_dilations_0 = const()[name = tensor("y_43_dilations_0"), val = tensor([1])]; tensor y_43_groups_0 = const()[name = tensor("y_43_groups_0"), val = tensor(1)]; tensor dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(79485184))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81844544))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor y_43_cast_fp16 = conv(dilations = y_43_dilations_0, groups = y_43_groups_0, pad = y_43_pad_0, pad_type = y_43_pad_type_0, strides = y_43_strides_0, weight = dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized, x = x_85_cast_fp16)[name = tensor("y_43_cast_fp16")]; tensor var_3380 = const()[name = tensor("op_3380"), val = tensor([0, 2, 1])]; tensor var_3381_cast_fp16 = transpose(perm = var_3380, x = y_43_cast_fp16)[name = tensor("transpose_154")]; tensor input_157_cast_fp16 = add(x = input_155_cast_fp16, y = var_3381_cast_fp16)[name = tensor("input_157_cast_fp16")]; tensor input_159_axes_0 = const()[name = tensor("input_159_axes_0"), val = tensor([-1])]; tensor dec_layers_11_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_11_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81846144)))]; tensor var_3385_to_fp16 = const()[name = tensor("op_3385_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_159_cast_fp16 = layer_norm(axes = input_159_axes_0, epsilon = var_3385_to_fp16, gamma = dec_layers_11_norm_self_weight_to_fp16, x = input_157_cast_fp16)[name = tensor("input_159_cast_fp16")]; tensor dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81847744))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83617280))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3351680)))]; tensor linear_44_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized, x = input_159_cast_fp16)[name = tensor("linear_44_cast_fp16")]; tensor var_3399 = const()[name = tensor("op_3399"), val = tensor([1, 1, 3, 12, 64])]; tensor qkv_cast_fp16 = reshape(shape = var_3399, x = linear_44_cast_fp16)[name = tensor("qkv_cast_fp16")]; tensor q_begin_0 = const()[name = tensor("q_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_end_0 = const()[name = tensor("q_end_0"), val = tensor([1, 1, 1, 12, 64])]; tensor q_end_mask_0 = const()[name = tensor("q_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_squeeze_mask_0 = const()[name = tensor("q_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_cast_fp16 = slice_by_index(begin = q_begin_0, end = q_end_0, end_mask = q_end_mask_0, squeeze_mask = q_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor("q_cast_fp16")]; tensor new_k_begin_0 = const()[name = tensor("new_k_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_end_0 = const()[name = tensor("new_k_end_0"), val = tensor([1, 1, 2, 12, 64])]; tensor new_k_end_mask_0 = const()[name = tensor("new_k_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_squeeze_mask_0 = const()[name = tensor("new_k_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_cast_fp16 = slice_by_index(begin = new_k_begin_0, end = new_k_end_0, end_mask = new_k_end_mask_0, squeeze_mask = new_k_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor("new_k_cast_fp16")]; tensor new_v_begin_0 = const()[name = tensor("new_v_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_end_0 = const()[name = tensor("new_v_end_0"), val = tensor([1, 1, 3, 12, 64])]; tensor new_v_end_mask_0 = const()[name = tensor("new_v_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_squeeze_mask_0 = const()[name = tensor("new_v_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_cast_fp16 = slice_by_index(begin = new_v_begin_0, end = new_v_end_0, end_mask = new_v_end_mask_0, squeeze_mask = new_v_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor("new_v_cast_fp16")]; tensor sa_k_in_11_to_fp16_dtype_0 = const()[name = tensor("sa_k_in_11_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_k_in_11_to_fp16 = cast(dtype = sa_k_in_11_to_fp16_dtype_0, x = sa_k_in_11)[name = tensor("cast_30")]; tensor var_3460_cast_fp16 = mul(x = sa_k_in_11_to_fp16, y = var_599_cast_fp16)[name = tensor("op_3460_cast_fp16")]; tensor var_3461_cast_fp16 = mul(x = new_k_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_3461_cast_fp16")]; tensor sa_k_out_cast_fp16 = add(x = var_3460_cast_fp16, y = var_3461_cast_fp16)[name = tensor("sa_k_out_cast_fp16")]; tensor sa_k_out_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_k_out_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor sa_v_in_11_to_fp16_dtype_0 = const()[name = tensor("sa_v_in_11_to_fp16_dtype_0"), val = tensor("fp16")]; tensor sa_v_in_11_to_fp16 = cast(dtype = sa_v_in_11_to_fp16_dtype_0, x = sa_v_in_11)[name = tensor("cast_29")]; tensor var_3467_cast_fp16 = mul(x = sa_v_in_11_to_fp16, y = var_599_cast_fp16)[name = tensor("op_3467_cast_fp16")]; tensor var_3468_cast_fp16 = mul(x = new_v_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor("op_3468_cast_fp16")]; tensor sa_v_out_cast_fp16 = add(x = var_3467_cast_fp16, y = var_3468_cast_fp16)[name = tensor("sa_v_out_cast_fp16")]; tensor sa_v_out_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("sa_v_out_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_3487 = const()[name = tensor("op_3487"), val = tensor([0, 2, -3, -1])]; tensor var_3489_transpose_x_0 = const()[name = tensor("op_3489_transpose_x_0"), val = tensor(false)]; tensor var_3489_transpose_y_0 = const()[name = tensor("op_3489_transpose_y_0"), val = tensor(false)]; tensor transpose_140_perm_0 = const()[name = tensor("transpose_140_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_141_perm_0 = const()[name = tensor("transpose_141_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_141 = transpose(perm = transpose_141_perm_0, x = sa_k_out_cast_fp16)[name = tensor("transpose_152")]; tensor transpose_140 = transpose(perm = transpose_140_perm_0, x = q_cast_fp16)[name = tensor("transpose_153")]; tensor var_3489_cast_fp16 = matmul(transpose_x = var_3489_transpose_x_0, transpose_y = var_3489_transpose_y_0, x = transpose_140, y = transpose_141)[name = tensor("op_3489_cast_fp16")]; tensor var_3490_to_fp16 = const()[name = tensor("op_3490_to_fp16"), val = tensor(0x1p-3)]; tensor scores_45_cast_fp16 = mul(x = var_3489_cast_fp16, y = var_3490_to_fp16)[name = tensor("scores_45_cast_fp16")]; tensor var_3508_to_fp16 = const()[name = tensor("op_3508_to_fp16"), val = tensor(-inf)]; tensor scores_cast_fp16 = select(a = var_3508_to_fp16, b = scores_45_cast_fp16, cond = var_647_cast_fp16)[name = tensor("scores_cast_fp16")]; tensor var_3510 = const()[name = tensor("op_3510"), val = tensor(-1)]; tensor probs_cast_fp16 = softmax(axis = var_3510, x = scores_cast_fp16)[name = tensor("probs_cast_fp16")]; tensor var_3513_transpose_x_0 = const()[name = tensor("op_3513_transpose_x_0"), val = tensor(false)]; tensor var_3513_transpose_y_0 = const()[name = tensor("op_3513_transpose_y_0"), val = tensor(false)]; tensor v_t_cast_fp16 = transpose(perm = var_3487, x = sa_v_out_cast_fp16)[name = tensor("transpose_151")]; tensor var_3513_cast_fp16 = matmul(transpose_x = var_3513_transpose_x_0, transpose_y = var_3513_transpose_y_0, x = probs_cast_fp16, y = v_t_cast_fp16)[name = tensor("op_3513_cast_fp16")]; tensor var_3518 = const()[name = tensor("op_3518"), val = tensor([0, 2, 1, 3])]; tensor var_3523 = const()[name = tensor("op_3523"), val = tensor([1, 1, -1])]; tensor var_3519_cast_fp16 = transpose(perm = var_3518, x = var_3513_cast_fp16)[name = tensor("transpose_150")]; tensor input_161_cast_fp16 = reshape(shape = var_3523, x = var_3519_cast_fp16)[name = tensor("input_161_cast_fp16")]; tensor dec_layers_11_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_11_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83621952))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84211840))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_45_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_11_self_attention_o_net_weight_to_fp16_quantized, x = input_161_cast_fp16)[name = tensor("linear_45_cast_fp16")]; tensor input_163_cast_fp16 = add(x = input_157_cast_fp16, y = linear_45_cast_fp16)[name = tensor("input_163_cast_fp16")]; tensor input_165_axes_0 = const()[name = tensor("input_165_axes_0"), val = tensor([-1])]; tensor dec_layers_11_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_11_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84213440)))]; tensor var_3531_to_fp16 = const()[name = tensor("op_3531_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_165_cast_fp16 = layer_norm(axes = input_165_axes_0, epsilon = var_3531_to_fp16, gamma = dec_layers_11_norm_xattn_query_weight_to_fp16, x = input_163_cast_fp16)[name = tensor("input_165_cast_fp16")]; tensor dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84215040))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84313408))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4057280)))]; tensor linear_46_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized, x = input_165_cast_fp16)[name = tensor("linear_46_cast_fp16")]; tensor var_3544 = const()[name = tensor("op_3544"), val = tensor([1, 1, 1, 128])]; tensor xq_proj_cast_fp16 = reshape(shape = var_3544, x = linear_46_cast_fp16)[name = tensor("xq_proj_cast_fp16")]; tensor var_3562 = const()[name = tensor("op_3562"), val = tensor([0, 2, -3, -1])]; tensor xa_v_11_to_fp16_dtype_0 = const()[name = tensor("xa_v_11_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_3564_transpose_x_0 = const()[name = tensor("op_3564_transpose_x_0"), val = tensor(false)]; tensor var_3564_transpose_y_0 = const()[name = tensor("op_3564_transpose_y_0"), val = tensor(false)]; tensor xa_k_11_to_fp16_dtype_0 = const()[name = tensor("xa_k_11_to_fp16_dtype_0"), val = tensor("fp16")]; tensor transpose_142_perm_0 = const()[name = tensor("transpose_142_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_143_perm_0 = const()[name = tensor("transpose_143_perm_0"), val = tensor([0, 2, -1, -3])]; tensor xa_k_11_to_fp16 = cast(dtype = xa_k_11_to_fp16_dtype_0, x = xa_k_11)[name = tensor("cast_28")]; tensor transpose_143 = transpose(perm = transpose_143_perm_0, x = xa_k_11_to_fp16)[name = tensor("transpose_148")]; tensor transpose_142 = transpose(perm = transpose_142_perm_0, x = xq_proj_cast_fp16)[name = tensor("transpose_149")]; tensor var_3564_cast_fp16 = matmul(transpose_x = var_3564_transpose_x_0, transpose_y = var_3564_transpose_y_0, x = transpose_142, y = transpose_143)[name = tensor("op_3564_cast_fp16")]; tensor var_3565_to_fp16 = const()[name = tensor("op_3565_to_fp16"), val = tensor(0x1.6ap-4)]; tensor xscores_45_cast_fp16 = mul(x = var_3564_cast_fp16, y = var_3565_to_fp16)[name = tensor("xscores_45_cast_fp16")]; tensor var_3583_to_fp16 = const()[name = tensor("op_3583_to_fp16"), val = tensor(-inf)]; tensor xscores_cast_fp16 = select(a = var_3583_to_fp16, b = xscores_45_cast_fp16, cond = var_722_cast_fp16)[name = tensor("xscores_cast_fp16")]; tensor var_3585 = const()[name = tensor("op_3585"), val = tensor(-1)]; tensor xprobs_cast_fp16 = softmax(axis = var_3585, x = xscores_cast_fp16)[name = tensor("xprobs_cast_fp16")]; tensor var_3588_transpose_x_0 = const()[name = tensor("op_3588_transpose_x_0"), val = tensor(false)]; tensor var_3588_transpose_y_0 = const()[name = tensor("op_3588_transpose_y_0"), val = tensor(false)]; tensor xa_v_11_to_fp16 = cast(dtype = xa_v_11_to_fp16_dtype_0, x = xa_v_11)[name = tensor("cast_27")]; tensor xvT_cast_fp16 = transpose(perm = var_3562, x = xa_v_11_to_fp16)[name = tensor("transpose_147")]; tensor var_3588_cast_fp16 = matmul(transpose_x = var_3588_transpose_x_0, transpose_y = var_3588_transpose_y_0, x = xprobs_cast_fp16, y = xvT_cast_fp16)[name = tensor("op_3588_cast_fp16")]; tensor var_3593 = const()[name = tensor("op_3593"), val = tensor([0, 2, 1, 3])]; tensor var_3598 = const()[name = tensor("op_3598"), val = tensor([1, 1, -1])]; tensor var_3594_cast_fp16 = transpose(perm = var_3593, x = var_3588_cast_fp16)[name = tensor("transpose_146")]; tensor input_167_cast_fp16 = reshape(shape = var_3598, x = var_3594_cast_fp16)[name = tensor("input_167_cast_fp16")]; tensor dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84313728))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84412096))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor linear_47_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized, x = input_167_cast_fp16)[name = tensor("linear_47_cast_fp16")]; tensor input_169_cast_fp16 = add(x = input_163_cast_fp16, y = linear_47_cast_fp16)[name = tensor("input_169_cast_fp16")]; tensor x_89_axes_0 = const()[name = tensor("x_89_axes_0"), val = tensor([-1])]; tensor dec_layers_11_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_11_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84413696)))]; tensor var_3606_to_fp16 = const()[name = tensor("op_3606_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_89_cast_fp16 = layer_norm(axes = x_89_axes_0, epsilon = var_3606_to_fp16, gamma = dec_layers_11_norm_pos_ff_weight_to_fp16, x = input_169_cast_fp16)[name = tensor("x_89_cast_fp16")]; tensor var_3622 = const()[name = tensor("op_3622"), val = tensor([0, 2, 1])]; tensor y_45_pad_type_0 = const()[name = tensor("y_45_pad_type_0"), val = tensor("valid")]; tensor y_45_strides_0 = const()[name = tensor("y_45_strides_0"), val = tensor([1])]; tensor y_45_pad_0 = const()[name = tensor("y_45_pad_0"), val = tensor([0, 0])]; tensor y_45_dilations_0 = const()[name = tensor("y_45_dilations_0"), val = tensor([1])]; tensor y_45_groups_0 = const()[name = tensor("y_45_groups_0"), val = tensor(1)]; tensor dec_layers_11_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_11_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84415296))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86774656))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6519040)))]; tensor x_91_cast_fp16 = transpose(perm = var_3622, x = x_89_cast_fp16)[name = tensor("transpose_145")]; tensor y_45_cast_fp16 = conv(dilations = y_45_dilations_0, groups = y_45_groups_0, pad = y_45_pad_0, pad_type = y_45_pad_type_0, strides = y_45_strides_0, weight = dec_layers_11_pos_ff_proj_weight_to_fp16_quantized, x = x_91_cast_fp16)[name = tensor("y_45_cast_fp16")]; tensor x_93_mode_0 = const()[name = tensor("x_93_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_93_cast_fp16 = gelu(mode = x_93_mode_0, x = y_45_cast_fp16)[name = tensor("x_93_cast_fp16")]; tensor y_pad_type_0 = const()[name = tensor("y_pad_type_0"), val = tensor("valid")]; tensor y_strides_0 = const()[name = tensor("y_strides_0"), val = tensor([1])]; tensor y_pad_0 = const()[name = tensor("y_pad_0"), val = tensor([0, 0])]; tensor y_dilations_0 = const()[name = tensor("y_dilations_0"), val = tensor([1])]; tensor y_groups_0 = const()[name = tensor("y_groups_0"), val = tensor(1)]; tensor dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86780864))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89140224))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3953280)))]; tensor y_cast_fp16 = conv(dilations = y_dilations_0, groups = y_groups_0, pad = y_pad_0, pad_type = y_pad_type_0, strides = y_strides_0, weight = dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized, x = x_93_cast_fp16)[name = tensor("y_cast_fp16")]; tensor var_3640 = const()[name = tensor("op_3640"), val = tensor([0, 2, 1])]; tensor var_3641_cast_fp16 = transpose(perm = var_3640, x = y_cast_fp16)[name = tensor("transpose_144")]; tensor input_171_cast_fp16 = add(x = input_169_cast_fp16, y = var_3641_cast_fp16)[name = tensor("input_171_cast_fp16")]; tensor input_axes_0 = const()[name = tensor("input_axes_0"), val = tensor([-1])]; tensor dec_norm_out_weight_to_fp16 = const()[name = tensor("dec_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89141824)))]; tensor var_3645_to_fp16 = const()[name = tensor("op_3645_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_cast_fp16 = layer_norm(axes = input_axes_0, epsilon = var_3645_to_fp16, gamma = dec_norm_out_weight_to_fp16, x = input_171_cast_fp16)[name = tensor("input_cast_fp16")]; tensor input_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("input_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor dec_final_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_final_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89143424))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101595200))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101578944)))]; tensor dec_final_proj_bias_to_fp16 = const()[name = tensor("dec_final_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101627648)))]; tensor linear_48_cast_fp16 = linear(bias = dec_final_proj_bias_to_fp16, weight = dec_final_proj_weight_to_fp16_quantized, x = input_cast_fp16)[name = tensor("linear_48_cast_fp16")]; tensor var_3658 = const()[name = tensor("op_3658"), val = tensor([1, 1, 8, 2024])]; tensor var_3659_cast_fp16 = reshape(shape = var_3658, x = linear_48_cast_fp16)[name = tensor("op_3659_cast_fp16")]; tensor var_3659_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("op_3659_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor sa_k_out_0 = cast(dtype = sa_k_out_1_cast_fp16_to_fp32_dtype_0, x = sa_k_out_1_cast_fp16)[name = tensor("cast_1")]; tensor sa_v_out_0 = cast(dtype = sa_v_out_1_cast_fp16_to_fp32_dtype_0, x = sa_v_out_1_cast_fp16)[name = tensor("cast_2")]; tensor sa_k_out_1 = cast(dtype = sa_k_out_3_cast_fp16_to_fp32_dtype_0, x = sa_k_out_3_cast_fp16)[name = tensor("cast_3")]; tensor sa_v_out_1 = cast(dtype = sa_v_out_3_cast_fp16_to_fp32_dtype_0, x = sa_v_out_3_cast_fp16)[name = tensor("cast_4")]; tensor sa_k_out_2 = cast(dtype = sa_k_out_5_cast_fp16_to_fp32_dtype_0, x = sa_k_out_5_cast_fp16)[name = tensor("cast_5")]; tensor sa_v_out_2 = cast(dtype = sa_v_out_5_cast_fp16_to_fp32_dtype_0, x = sa_v_out_5_cast_fp16)[name = tensor("cast_6")]; tensor sa_k_out_3 = cast(dtype = sa_k_out_7_cast_fp16_to_fp32_dtype_0, x = sa_k_out_7_cast_fp16)[name = tensor("cast_7")]; tensor sa_v_out_3 = cast(dtype = sa_v_out_7_cast_fp16_to_fp32_dtype_0, x = sa_v_out_7_cast_fp16)[name = tensor("cast_8")]; tensor sa_k_out_4 = cast(dtype = sa_k_out_9_cast_fp16_to_fp32_dtype_0, x = sa_k_out_9_cast_fp16)[name = tensor("cast_9")]; tensor sa_v_out_4 = cast(dtype = sa_v_out_9_cast_fp16_to_fp32_dtype_0, x = sa_v_out_9_cast_fp16)[name = tensor("cast_10")]; tensor sa_k_out_5 = cast(dtype = sa_k_out_11_cast_fp16_to_fp32_dtype_0, x = sa_k_out_11_cast_fp16)[name = tensor("cast_11")]; tensor sa_v_out_5 = cast(dtype = sa_v_out_11_cast_fp16_to_fp32_dtype_0, x = sa_v_out_11_cast_fp16)[name = tensor("cast_12")]; tensor sa_k_out_6 = cast(dtype = sa_k_out_13_cast_fp16_to_fp32_dtype_0, x = sa_k_out_13_cast_fp16)[name = tensor("cast_13")]; tensor sa_v_out_6 = cast(dtype = sa_v_out_13_cast_fp16_to_fp32_dtype_0, x = sa_v_out_13_cast_fp16)[name = tensor("cast_14")]; tensor sa_k_out_7 = cast(dtype = sa_k_out_15_cast_fp16_to_fp32_dtype_0, x = sa_k_out_15_cast_fp16)[name = tensor("cast_15")]; tensor sa_v_out_7 = cast(dtype = sa_v_out_15_cast_fp16_to_fp32_dtype_0, x = sa_v_out_15_cast_fp16)[name = tensor("cast_16")]; tensor sa_k_out_8 = cast(dtype = sa_k_out_17_cast_fp16_to_fp32_dtype_0, x = sa_k_out_17_cast_fp16)[name = tensor("cast_17")]; tensor sa_v_out_8 = cast(dtype = sa_v_out_17_cast_fp16_to_fp32_dtype_0, x = sa_v_out_17_cast_fp16)[name = tensor("cast_18")]; tensor sa_k_out_9 = cast(dtype = sa_k_out_19_cast_fp16_to_fp32_dtype_0, x = sa_k_out_19_cast_fp16)[name = tensor("cast_19")]; tensor sa_v_out_9 = cast(dtype = sa_v_out_19_cast_fp16_to_fp32_dtype_0, x = sa_v_out_19_cast_fp16)[name = tensor("cast_20")]; tensor sa_k_out_10 = cast(dtype = sa_k_out_21_cast_fp16_to_fp32_dtype_0, x = sa_k_out_21_cast_fp16)[name = tensor("cast_21")]; tensor sa_v_out_10 = cast(dtype = sa_v_out_21_cast_fp16_to_fp32_dtype_0, x = sa_v_out_21_cast_fp16)[name = tensor("cast_22")]; tensor sa_k_out_11 = cast(dtype = sa_k_out_cast_fp16_to_fp32_dtype_0, x = sa_k_out_cast_fp16)[name = tensor("cast_23")]; tensor sa_v_out_11 = cast(dtype = sa_v_out_cast_fp16_to_fp32_dtype_0, x = sa_v_out_cast_fp16)[name = tensor("cast_24")]; tensor h_last = cast(dtype = input_cast_fp16_to_fp32_dtype_0, x = input_cast_fp16)[name = tensor("cast_25")]; tensor logits = cast(dtype = var_3659_cast_fp16_to_fp32_dtype_0, x = var_3659_cast_fp16)[name = tensor("cast_26")]; tensor encoder_output_tmp = identity(x = encoder_output)[name = tensor("encoder_output_tmp")]; } -> (logits, h_last, sa_k_out_0, sa_k_out_1, sa_k_out_2, sa_k_out_3, sa_k_out_4, sa_k_out_5, sa_k_out_6, sa_k_out_7, sa_k_out_8, sa_k_out_9, sa_k_out_10, sa_k_out_11, sa_v_out_0, sa_v_out_1, sa_v_out_2, sa_v_out_3, sa_v_out_4, sa_v_out_5, sa_v_out_6, sa_v_out_7, sa_v_out_8, sa_v_out_9, sa_v_out_10, sa_v_out_11); }