Magpie-TTS-Multilingual-357M-CoreML-8bit

File size: 301,906 Bytes

ca600aa

program(1.0)
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}})]
{
    func main<ios17>(tensor<fp32, [1, 1, 768]> audio_emb, tensor<fp32, [1, 256]> encoder_mask, tensor<fp32, [1, 256, 768]> encoder_output, tensor<int32, [1]> position, tensor<fp32, [1, 600, 12, 64]> sa_k_in_0, tensor<fp32, [1, 600, 12, 64]> sa_k_in_1, tensor<fp32, [1, 600, 12, 64]> sa_k_in_10, tensor<fp32, [1, 600, 12, 64]> sa_k_in_11, tensor<fp32, [1, 600, 12, 64]> sa_k_in_2, tensor<fp32, [1, 600, 12, 64]> sa_k_in_3, tensor<fp32, [1, 600, 12, 64]> sa_k_in_4, tensor<fp32, [1, 600, 12, 64]> sa_k_in_5, tensor<fp32, [1, 600, 12, 64]> sa_k_in_6, tensor<fp32, [1, 600, 12, 64]> sa_k_in_7, tensor<fp32, [1, 600, 12, 64]> sa_k_in_8, tensor<fp32, [1, 600, 12, 64]> sa_k_in_9, tensor<fp32, [1, 600, 12, 64]> sa_v_in_0, tensor<fp32, [1, 600, 12, 64]> sa_v_in_1, tensor<fp32, [1, 600, 12, 64]> sa_v_in_10, tensor<fp32, [1, 600, 12, 64]> sa_v_in_11, tensor<fp32, [1, 600, 12, 64]> sa_v_in_2, tensor<fp32, [1, 600, 12, 64]> sa_v_in_3, tensor<fp32, [1, 600, 12, 64]> sa_v_in_4, tensor<fp32, [1, 600, 12, 64]> sa_v_in_5, tensor<fp32, [1, 600, 12, 64]> sa_v_in_6, tensor<fp32, [1, 600, 12, 64]> sa_v_in_7, tensor<fp32, [1, 600, 12, 64]> sa_v_in_8, tensor<fp32, [1, 600, 12, 64]> sa_v_in_9, tensor<fp32, [1, 256, 1, 128]> xa_k_0, tensor<fp32, [1, 256, 1, 128]> xa_k_1, tensor<fp32, [1, 256, 1, 128]> xa_k_10, tensor<fp32, [1, 256, 1, 128]> xa_k_11, tensor<fp32, [1, 256, 1, 128]> xa_k_2, tensor<fp32, [1, 256, 1, 128]> xa_k_3, tensor<fp32, [1, 256, 1, 128]> xa_k_4, tensor<fp32, [1, 256, 1, 128]> xa_k_5, tensor<fp32, [1, 256, 1, 128]> xa_k_6, tensor<fp32, [1, 256, 1, 128]> xa_k_7, tensor<fp32, [1, 256, 1, 128]> xa_k_8, tensor<fp32, [1, 256, 1, 128]> xa_k_9, tensor<fp32, [1, 256, 1, 128]> xa_v_0, tensor<fp32, [1, 256, 1, 128]> xa_v_1, tensor<fp32, [1, 256, 1, 128]> xa_v_10, tensor<fp32, [1, 256, 1, 128]> xa_v_11, tensor<fp32, [1, 256, 1, 128]> xa_v_2, tensor<fp32, [1, 256, 1, 128]> xa_v_3, tensor<fp32, [1, 256, 1, 128]> xa_v_4, tensor<fp32, [1, 256, 1, 128]> xa_v_5, tensor<fp32, [1, 256, 1, 128]> xa_v_6, tensor<fp32, [1, 256, 1, 128]> xa_v_7, tensor<fp32, [1, 256, 1, 128]> xa_v_8, tensor<fp32, [1, 256, 1, 128]> xa_v_9) {
            tensor<int32, []> var_502_batch_dims_0 = const()[name = tensor<string, []>("op_502_batch_dims_0"), val = tensor<int32, []>(0)];
            tensor<bool, []> var_502_validate_indices_0 = const()[name = tensor<string, []>("op_502_validate_indices_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [2048, 768]> dec_position_embeddings_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_position_embeddings_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2048, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64))), scale = tensor<fp16, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1575104))), zero_point = tensor<int8, [2048]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1572992)))];
            tensor<string, []> position_to_int16_dtype_0 = const()[name = tensor<string, []>("position_to_int16_dtype_0"), val = tensor<string, []>("int16")];
            tensor<string, []> cast_63_dtype_0 = const()[name = tensor<string, []>("cast_63_dtype_0"), val = tensor<string, []>("int32")];
            tensor<int32, []> greater_equal_0_y_0 = const()[name = tensor<string, []>("greater_equal_0_y_0"), val = tensor<int32, []>(0)];
            tensor<int16, [1]> position_to_int16 = cast(dtype = position_to_int16_dtype_0, x = position)[name = tensor<string, []>("cast_83")];
            tensor<int32, [1]> cast_63 = cast(dtype = cast_63_dtype_0, x = position_to_int16)[name = tensor<string, []>("cast_82")];
            tensor<bool, [1]> greater_equal_0 = greater_equal(x = cast_63, y = greater_equal_0_y_0)[name = tensor<string, []>("greater_equal_0")];
            tensor<int32, []> slice_by_index_0 = const()[name = tensor<string, []>("slice_by_index_0"), val = tensor<int32, []>(2048)];
            tensor<int32, [1]> add_0 = add(x = cast_63, y = slice_by_index_0)[name = tensor<string, []>("add_0")];
            tensor<int32, [1]> select_0 = select(a = cast_63, b = add_0, cond = greater_equal_0)[name = tensor<string, []>("select_0")];
            tensor<string, []> select_0_to_int16_dtype_0 = const()[name = tensor<string, []>("select_0_to_int16_dtype_0"), val = tensor<string, []>("int16")];
            tensor<string, []> cast_0_dtype_0 = const()[name = tensor<string, []>("cast_0_dtype_0"), val = tensor<string, []>("int32")];
            tensor<int32, []> greater_equal_0_y_0_1 = const()[name = tensor<string, []>("greater_equal_0_y_0_1"), val = tensor<int32, []>(0)];
            tensor<int16, [1]> select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = tensor<string, []>("cast_81")];
            tensor<int32, [1]> cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = tensor<string, []>("cast_80")];
            tensor<bool, [1]> greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = tensor<string, []>("greater_equal_0_1")];
            tensor<int32, []> slice_by_index_0_1 = const()[name = tensor<string, []>("slice_by_index_0_1"), val = tensor<int32, []>(2048)];
            tensor<int32, [1]> add_0_1 = add(x = cast_0, y = slice_by_index_0_1)[name = tensor<string, []>("add_0_1")];
            tensor<int32, [1]> select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = tensor<string, []>("select_0_1")];
            tensor<int32, []> op_502_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = tensor<string, []>("op_502_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = tensor<int32, []>(0)];
            tensor<fp16, [1, 768]> op_502_cast_fp16_cast_uint16_cast_uint16 = gather(axis = op_502_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = var_502_batch_dims_0, indices = select_0_1, validate_indices = var_502_validate_indices_0, x = dec_position_embeddings_weight_to_fp16_quantized)[name = tensor<string, []>("op_502_cast_fp16_cast_uint16_cast_uint16")];
            tensor<string, []> audio_emb_to_fp16_dtype_0 = const()[name = tensor<string, []>("audio_emb_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 1, 768]> audio_emb_to_fp16 = cast(dtype = audio_emb_to_fp16_dtype_0, x = audio_emb)[name = tensor<string, []>("cast_79")];
            tensor<fp16, [1, 1, 768]> input_3_cast_fp16 = add(x = audio_emb_to_fp16, y = op_502_cast_fp16_cast_uint16_cast_uint16)[name = tensor<string, []>("input_3_cast_fp16")];
            tensor<fp16, [600]> idx_range_promoted_to_fp16 = const()[name = tensor<string, []>("idx_range_promoted_to_fp16"), val = tensor<fp16, [600]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1579264)))];
            tensor<string, []> var_515_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_515_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1]> position_to_fp16 = cast(dtype = var_515_to_fp16_dtype_0, x = position)[name = tensor<string, []>("cast_78")];
            tensor<bool, [600]> var_516_cast_fp16 = less_equal(x = idx_range_promoted_to_fp16, y = position_to_fp16)[name = tensor<string, []>("op_516_cast_fp16")];
            tensor<int32, [1]> sa_key_mask_axes_0 = const()[name = tensor<string, []>("sa_key_mask_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<string, []> sa_key_mask_1_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_key_mask_1_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [600]> var_516_cast_fp16_to_fp16 = cast(dtype = sa_key_mask_1_to_fp16_dtype_0, x = var_516_cast_fp16)[name = tensor<string, []>("cast_77")];
            tensor<fp16, [1, 600]> sa_key_mask_cast_fp16 = expand_dims(axes = sa_key_mask_axes_0, x = var_516_cast_fp16_to_fp16)[name = tensor<string, []>("sa_key_mask_cast_fp16")];
            tensor<int32, [1]> input_5_axes_0 = const()[name = tensor<string, []>("input_5_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_0_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_0_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1580544)))];
            tensor<fp16, []> var_525_to_fp16 = const()[name = tensor<string, []>("op_525_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_5_cast_fp16 = layer_norm(axes = input_5_axes_0, epsilon = var_525_to_fp16, gamma = dec_layers_0_norm_self_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1582144))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3354048))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [2304]> linear_0_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_0_bias_0_to_fp16"), val = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3358720)))];
            tensor<fp16, [1, 1, 2304]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized, x = input_5_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
            tensor<int32, [5]> var_539 = const()[name = tensor<string, []>("op_539"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_3_cast_fp16 = reshape(shape = var_539, x = linear_0_cast_fp16)[name = tensor<string, []>("qkv_3_cast_fp16")];
            tensor<int32, [5]> q_1_begin_0 = const()[name = tensor<string, []>("q_1_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_1_end_0 = const()[name = tensor<string, []>("q_1_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_1_end_mask_0 = const()[name = tensor<string, []>("q_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_1_squeeze_mask_0 = const()[name = tensor<string, []>("q_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_1_cast_fp16 = slice_by_index(begin = q_1_begin_0, end = q_1_end_0, end_mask = q_1_end_mask_0, squeeze_mask = q_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor<string, []>("q_1_cast_fp16")];
            tensor<int32, [5]> new_k_1_begin_0 = const()[name = tensor<string, []>("new_k_1_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_1_end_0 = const()[name = tensor<string, []>("new_k_1_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_1_end_mask_0 = const()[name = tensor<string, []>("new_k_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_1_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_1_cast_fp16 = slice_by_index(begin = new_k_1_begin_0, end = new_k_1_end_0, end_mask = new_k_1_end_mask_0, squeeze_mask = new_k_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor<string, []>("new_k_1_cast_fp16")];
            tensor<int32, [5]> new_v_1_begin_0 = const()[name = tensor<string, []>("new_v_1_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_1_end_0 = const()[name = tensor<string, []>("new_v_1_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_1_end_mask_0 = const()[name = tensor<string, []>("new_v_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_1_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_1_cast_fp16 = slice_by_index(begin = new_v_1_begin_0, end = new_v_1_end_0, end_mask = new_v_1_end_mask_0, squeeze_mask = new_v_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor<string, []>("new_v_1_cast_fp16")];
            tensor<bool, [600]> var_585_cast_fp16 = equal(x = idx_range_promoted_to_fp16, y = position_to_fp16)[name = tensor<string, []>("op_585_cast_fp16")];
            tensor<int32, [4]> var_595 = const()[name = tensor<string, []>("op_595"), val = tensor<int32, [4]>([1, 600, 1, 1])];
            tensor<string, []> write_oh_1_to_fp16_dtype_0 = const()[name = tensor<string, []>("write_oh_1_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [600]> var_585_cast_fp16_to_fp16 = cast(dtype = write_oh_1_to_fp16_dtype_0, x = var_585_cast_fp16)[name = tensor<string, []>("cast_76")];
            tensor<fp16, [1, 600, 1, 1]> write_oh_b_1_cast_fp16 = reshape(shape = var_595, x = var_585_cast_fp16_to_fp16)[name = tensor<string, []>("write_oh_b_1_cast_fp16")];
            tensor<fp16, []> var_597_to_fp16 = const()[name = tensor<string, []>("op_597_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
            tensor<fp16, [1, 600, 1, 1]> var_599_cast_fp16 = sub(x = var_597_to_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_599_cast_fp16")];
            tensor<string, []> sa_k_in_0_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_0_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_0_to_fp16 = cast(dtype = sa_k_in_0_to_fp16_dtype_0, x = sa_k_in_0)[name = tensor<string, []>("cast_75")];
            tensor<fp16, [1, 600, 12, 64]> var_600_cast_fp16 = mul(x = sa_k_in_0_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_600_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_601_cast_fp16 = mul(x = new_k_1_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_601_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_1_cast_fp16 = add(x = var_600_cast_fp16, y = var_601_cast_fp16)[name = tensor<string, []>("sa_k_out_1_cast_fp16")];
            tensor<string, []> sa_k_out_1_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_1_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_0_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_0_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_0_to_fp16 = cast(dtype = sa_v_in_0_to_fp16_dtype_0, x = sa_v_in_0)[name = tensor<string, []>("cast_74")];
            tensor<fp16, [1, 600, 12, 64]> var_607_cast_fp16 = mul(x = sa_v_in_0_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_607_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_608_cast_fp16 = mul(x = new_v_1_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_608_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_1_cast_fp16 = add(x = var_607_cast_fp16, y = var_608_cast_fp16)[name = tensor<string, []>("sa_v_out_1_cast_fp16")];
            tensor<string, []> sa_v_out_1_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_1_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_627 = const()[name = tensor<string, []>("op_627"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_629_transpose_x_0 = const()[name = tensor<string, []>("op_629_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_629_transpose_y_0 = const()[name = tensor<string, []>("op_629_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_96_perm_0 = const()[name = tensor<string, []>("transpose_96_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_97_perm_0 = const()[name = tensor<string, []>("transpose_97_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_97 = transpose(perm = transpose_97_perm_0, x = sa_k_out_1_cast_fp16)[name = tensor<string, []>("transpose_262")];
            tensor<fp16, [1, 12, 1, 64]> transpose_96 = transpose(perm = transpose_96_perm_0, x = q_1_cast_fp16)[name = tensor<string, []>("transpose_263")];
            tensor<fp16, [1, 12, 1, 600]> var_629_cast_fp16 = matmul(transpose_x = var_629_transpose_x_0, transpose_y = var_629_transpose_y_0, x = transpose_96, y = transpose_97)[name = tensor<string, []>("op_629_cast_fp16")];
            tensor<fp16, []> var_630_to_fp16 = const()[name = tensor<string, []>("op_630_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_1_cast_fp16 = mul(x = var_629_cast_fp16, y = var_630_to_fp16)[name = tensor<string, []>("scores_1_cast_fp16")];
            tensor<int32, [1]> var_638_axes_0 = const()[name = tensor<string, []>("op_638_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [1, 1, 600]> var_638_cast_fp16 = expand_dims(axes = var_638_axes_0, x = sa_key_mask_cast_fp16)[name = tensor<string, []>("op_638_cast_fp16")];
            tensor<int32, [1]> var_640_axes_0 = const()[name = tensor<string, []>("op_640_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 1, 1, 600]> var_640_cast_fp16 = expand_dims(axes = var_640_axes_0, x = var_638_cast_fp16)[name = tensor<string, []>("op_640_cast_fp16")];
            tensor<fp16, []> var_646_promoted_to_fp16 = const()[name = tensor<string, []>("op_646_promoted_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
            tensor<bool, [1, 1, 1, 600]> var_647_cast_fp16 = equal(x = var_640_cast_fp16, y = var_646_promoted_to_fp16)[name = tensor<string, []>("op_647_cast_fp16")];
            tensor<fp16, []> var_648_to_fp16 = const()[name = tensor<string, []>("op_648_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_3_cast_fp16 = select(a = var_648_to_fp16, b = scores_1_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_3_cast_fp16")];
            tensor<int32, []> var_650 = const()[name = tensor<string, []>("op_650"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_1_cast_fp16 = softmax(axis = var_650, x = scores_3_cast_fp16)[name = tensor<string, []>("probs_1_cast_fp16")];
            tensor<bool, []> var_653_transpose_x_0 = const()[name = tensor<string, []>("op_653_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_653_transpose_y_0 = const()[name = tensor<string, []>("op_653_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_1_cast_fp16 = transpose(perm = var_627, x = sa_v_out_1_cast_fp16)[name = tensor<string, []>("transpose_261")];
            tensor<fp16, [1, 12, 1, 64]> var_653_cast_fp16 = matmul(transpose_x = var_653_transpose_x_0, transpose_y = var_653_transpose_y_0, x = probs_1_cast_fp16, y = v_t_1_cast_fp16)[name = tensor<string, []>("op_653_cast_fp16")];
            tensor<int32, [4]> var_658 = const()[name = tensor<string, []>("op_658"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_663 = const()[name = tensor<string, []>("op_663"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_659_cast_fp16 = transpose(perm = var_658, x = var_653_cast_fp16)[name = tensor<string, []>("transpose_260")];
            tensor<fp16, [1, 1, 768]> input_7_cast_fp16 = reshape(shape = var_663, x = var_659_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_0_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3363392))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3954112))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [768]> linear_1_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_1_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3955712)))];
            tensor<fp16, [1, 1, 768]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_0_self_attention_o_net_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = tensor<string, []>("linear_1_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_9_cast_fp16 = add(x = input_3_cast_fp16, y = linear_1_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
            tensor<int32, [1]> input_11_axes_0 = const()[name = tensor<string, []>("input_11_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_0_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_0_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3957312)))];
            tensor<fp16, []> var_671_to_fp16 = const()[name = tensor<string, []>("op_671_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_11_cast_fp16 = layer_norm(axes = input_11_axes_0, epsilon = var_671_to_fp16, gamma = dec_layers_0_norm_xattn_query_weight_to_fp16, x = input_9_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3958912))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057472))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [128]> linear_2_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_2_bias_0_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057792)))];
            tensor<fp16, [1, 1, 128]> linear_2_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized, x = input_11_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
            tensor<int32, [4]> var_684 = const()[name = tensor<string, []>("op_684"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_1_cast_fp16 = reshape(shape = var_684, x = linear_2_cast_fp16)[name = tensor<string, []>("xq_proj_1_cast_fp16")];
            tensor<int32, [4]> var_702 = const()[name = tensor<string, []>("op_702"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_0_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_0_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_704_transpose_x_0 = const()[name = tensor<string, []>("op_704_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_704_transpose_y_0 = const()[name = tensor<string, []>("op_704_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_0_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_0_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_98_perm_0 = const()[name = tensor<string, []>("transpose_98_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_99_perm_0 = const()[name = tensor<string, []>("transpose_99_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_0_to_fp16 = cast(dtype = xa_k_0_to_fp16_dtype_0, x = xa_k_0)[name = tensor<string, []>("cast_73")];
            tensor<fp16, [1, 1, 128, 256]> transpose_99 = transpose(perm = transpose_99_perm_0, x = xa_k_0_to_fp16)[name = tensor<string, []>("transpose_258")];
            tensor<fp16, [1, 1, 1, 128]> transpose_98 = transpose(perm = transpose_98_perm_0, x = xq_proj_1_cast_fp16)[name = tensor<string, []>("transpose_259")];
            tensor<fp16, [1, 1, 1, 256]> var_704_cast_fp16 = matmul(transpose_x = var_704_transpose_x_0, transpose_y = var_704_transpose_y_0, x = transpose_98, y = transpose_99)[name = tensor<string, []>("op_704_cast_fp16")];
            tensor<fp16, []> var_705_to_fp16 = const()[name = tensor<string, []>("op_705_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_1_cast_fp16 = mul(x = var_704_cast_fp16, y = var_705_to_fp16)[name = tensor<string, []>("xscores_1_cast_fp16")];
            tensor<int32, [1]> var_713_axes_0 = const()[name = tensor<string, []>("op_713_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<string, []> encoder_mask_to_fp16_dtype_0 = const()[name = tensor<string, []>("encoder_mask_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 256]> encoder_mask_to_fp16 = cast(dtype = encoder_mask_to_fp16_dtype_0, x = encoder_mask)[name = tensor<string, []>("cast_72")];
            tensor<fp16, [1, 1, 256]> var_713_cast_fp16 = expand_dims(axes = var_713_axes_0, x = encoder_mask_to_fp16)[name = tensor<string, []>("op_713_cast_fp16")];
            tensor<int32, [1]> var_715_axes_0 = const()[name = tensor<string, []>("op_715_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 1, 1, 256]> var_715_cast_fp16 = expand_dims(axes = var_715_axes_0, x = var_713_cast_fp16)[name = tensor<string, []>("op_715_cast_fp16")];
            tensor<fp16, []> var_721_promoted_to_fp16 = const()[name = tensor<string, []>("op_721_promoted_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
            tensor<bool, [1, 1, 1, 256]> var_722_cast_fp16 = equal(x = var_715_cast_fp16, y = var_721_promoted_to_fp16)[name = tensor<string, []>("op_722_cast_fp16")];
            tensor<fp16, []> var_723_to_fp16 = const()[name = tensor<string, []>("op_723_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_3_cast_fp16 = select(a = var_723_to_fp16, b = xscores_1_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_3_cast_fp16")];
            tensor<int32, []> var_725 = const()[name = tensor<string, []>("op_725"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_1_cast_fp16 = softmax(axis = var_725, x = xscores_3_cast_fp16)[name = tensor<string, []>("xprobs_1_cast_fp16")];
            tensor<bool, []> var_728_transpose_x_0 = const()[name = tensor<string, []>("op_728_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_728_transpose_y_0 = const()[name = tensor<string, []>("op_728_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_0_to_fp16 = cast(dtype = xa_v_0_to_fp16_dtype_0, x = xa_v_0)[name = tensor<string, []>("cast_71")];
            tensor<fp16, [1, 1, 256, 128]> xvT_1_cast_fp16 = transpose(perm = var_702, x = xa_v_0_to_fp16)[name = tensor<string, []>("transpose_257")];
            tensor<fp16, [1, 1, 1, 128]> var_728_cast_fp16 = matmul(transpose_x = var_728_transpose_x_0, transpose_y = var_728_transpose_y_0, x = xprobs_1_cast_fp16, y = xvT_1_cast_fp16)[name = tensor<string, []>("op_728_cast_fp16")];
            tensor<int32, [4]> var_733 = const()[name = tensor<string, []>("op_733"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_738 = const()[name = tensor<string, []>("op_738"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_734_cast_fp16 = transpose(perm = var_733, x = var_728_cast_fp16)[name = tensor<string, []>("transpose_256")];
            tensor<fp16, [1, 1, 128]> input_13_cast_fp16 = reshape(shape = var_738, x = var_734_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4058112))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4156480))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_3_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized, x = input_13_cast_fp16)[name = tensor<string, []>("linear_3_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_15_cast_fp16 = add(x = input_9_cast_fp16, y = linear_3_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
            tensor<int32, [1]> x_1_axes_0 = const()[name = tensor<string, []>("x_1_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_0_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_0_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4158080)))];
            tensor<fp16, []> var_746_to_fp16 = const()[name = tensor<string, []>("op_746_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_1_cast_fp16 = layer_norm(axes = x_1_axes_0, epsilon = var_746_to_fp16, gamma = dec_layers_0_norm_pos_ff_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("x_1_cast_fp16")];
            tensor<int32, [3]> var_762 = const()[name = tensor<string, []>("op_762"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_1_pad_type_0 = const()[name = tensor<string, []>("y_1_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_1_strides_0 = const()[name = tensor<string, []>("y_1_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_1_pad_0 = const()[name = tensor<string, []>("y_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_1_dilations_0 = const()[name = tensor<string, []>("y_1_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_1_groups_0 = const()[name = tensor<string, []>("y_1_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_0_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4159680))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6522176))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_3_cast_fp16 = transpose(perm = var_762, x = x_1_cast_fp16)[name = tensor<string, []>("transpose_255")];
            tensor<fp16, [1, 3072, 1]> y_1_cast_fp16 = conv(dilations = y_1_dilations_0, groups = y_1_groups_0, pad = y_1_pad_0, pad_type = y_1_pad_type_0, strides = y_1_strides_0, weight = dec_layers_0_pos_ff_proj_weight_to_fp16_quantized, x = x_3_cast_fp16)[name = tensor<string, []>("y_1_cast_fp16")];
            tensor<string, []> x_5_mode_0 = const()[name = tensor<string, []>("x_5_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_5_cast_fp16 = gelu(mode = x_5_mode_0, x = y_1_cast_fp16)[name = tensor<string, []>("x_5_cast_fp16")];
            tensor<string, []> y_3_pad_type_0 = const()[name = tensor<string, []>("y_3_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_3_strides_0 = const()[name = tensor<string, []>("y_3_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_3_pad_0 = const()[name = tensor<string, []>("y_3_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_3_dilations_0 = const()[name = tensor<string, []>("y_3_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_3_groups_0 = const()[name = tensor<string, []>("y_3_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6528384))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8887744))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_3_cast_fp16 = conv(dilations = y_3_dilations_0, groups = y_3_groups_0, pad = y_3_pad_0, pad_type = y_3_pad_type_0, strides = y_3_strides_0, weight = dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized, x = x_5_cast_fp16)[name = tensor<string, []>("y_3_cast_fp16")];
            tensor<int32, [3]> var_780 = const()[name = tensor<string, []>("op_780"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_781_cast_fp16 = transpose(perm = var_780, x = y_3_cast_fp16)[name = tensor<string, []>("transpose_254")];
            tensor<fp16, [1, 1, 768]> input_17_cast_fp16 = add(x = input_15_cast_fp16, y = var_781_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
            tensor<int32, [1]> input_19_axes_0 = const()[name = tensor<string, []>("input_19_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_1_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_1_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8889344)))];
            tensor<fp16, []> var_785_to_fp16 = const()[name = tensor<string, []>("op_785_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_19_cast_fp16 = layer_norm(axes = input_19_axes_0, epsilon = var_785_to_fp16, gamma = dec_layers_1_norm_self_weight_to_fp16, x = input_17_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8890944))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10660480))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = tensor<string, []>("linear_4_cast_fp16")];
            tensor<int32, [5]> var_799 = const()[name = tensor<string, []>("op_799"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_7_cast_fp16 = reshape(shape = var_799, x = linear_4_cast_fp16)[name = tensor<string, []>("qkv_7_cast_fp16")];
            tensor<int32, [5]> q_3_begin_0 = const()[name = tensor<string, []>("q_3_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_3_end_0 = const()[name = tensor<string, []>("q_3_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_3_end_mask_0 = const()[name = tensor<string, []>("q_3_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_3_squeeze_mask_0 = const()[name = tensor<string, []>("q_3_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_3_cast_fp16 = slice_by_index(begin = q_3_begin_0, end = q_3_end_0, end_mask = q_3_end_mask_0, squeeze_mask = q_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor<string, []>("q_3_cast_fp16")];
            tensor<int32, [5]> new_k_3_begin_0 = const()[name = tensor<string, []>("new_k_3_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_3_end_0 = const()[name = tensor<string, []>("new_k_3_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_3_end_mask_0 = const()[name = tensor<string, []>("new_k_3_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_3_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_3_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_3_cast_fp16 = slice_by_index(begin = new_k_3_begin_0, end = new_k_3_end_0, end_mask = new_k_3_end_mask_0, squeeze_mask = new_k_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor<string, []>("new_k_3_cast_fp16")];
            tensor<int32, [5]> new_v_3_begin_0 = const()[name = tensor<string, []>("new_v_3_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_3_end_0 = const()[name = tensor<string, []>("new_v_3_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_3_end_mask_0 = const()[name = tensor<string, []>("new_v_3_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_3_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_3_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_3_cast_fp16 = slice_by_index(begin = new_v_3_begin_0, end = new_v_3_end_0, end_mask = new_v_3_end_mask_0, squeeze_mask = new_v_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor<string, []>("new_v_3_cast_fp16")];
            tensor<string, []> sa_k_in_1_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_1_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_1_to_fp16 = cast(dtype = sa_k_in_1_to_fp16_dtype_0, x = sa_k_in_1)[name = tensor<string, []>("cast_70")];
            tensor<fp16, [1, 600, 12, 64]> var_860_cast_fp16 = mul(x = sa_k_in_1_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_860_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_861_cast_fp16 = mul(x = new_k_3_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_861_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_3_cast_fp16 = add(x = var_860_cast_fp16, y = var_861_cast_fp16)[name = tensor<string, []>("sa_k_out_3_cast_fp16")];
            tensor<string, []> sa_k_out_3_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_3_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_1_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_1_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_1_to_fp16 = cast(dtype = sa_v_in_1_to_fp16_dtype_0, x = sa_v_in_1)[name = tensor<string, []>("cast_69")];
            tensor<fp16, [1, 600, 12, 64]> var_867_cast_fp16 = mul(x = sa_v_in_1_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_867_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_868_cast_fp16 = mul(x = new_v_3_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_868_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_3_cast_fp16 = add(x = var_867_cast_fp16, y = var_868_cast_fp16)[name = tensor<string, []>("sa_v_out_3_cast_fp16")];
            tensor<string, []> sa_v_out_3_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_3_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_887 = const()[name = tensor<string, []>("op_887"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_889_transpose_x_0 = const()[name = tensor<string, []>("op_889_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_889_transpose_y_0 = const()[name = tensor<string, []>("op_889_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_100_perm_0 = const()[name = tensor<string, []>("transpose_100_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_101_perm_0 = const()[name = tensor<string, []>("transpose_101_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_101 = transpose(perm = transpose_101_perm_0, x = sa_k_out_3_cast_fp16)[name = tensor<string, []>("transpose_252")];
            tensor<fp16, [1, 12, 1, 64]> transpose_100 = transpose(perm = transpose_100_perm_0, x = q_3_cast_fp16)[name = tensor<string, []>("transpose_253")];
            tensor<fp16, [1, 12, 1, 600]> var_889_cast_fp16 = matmul(transpose_x = var_889_transpose_x_0, transpose_y = var_889_transpose_y_0, x = transpose_100, y = transpose_101)[name = tensor<string, []>("op_889_cast_fp16")];
            tensor<fp16, []> var_890_to_fp16 = const()[name = tensor<string, []>("op_890_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_5_cast_fp16 = mul(x = var_889_cast_fp16, y = var_890_to_fp16)[name = tensor<string, []>("scores_5_cast_fp16")];
            tensor<fp16, []> var_908_to_fp16 = const()[name = tensor<string, []>("op_908_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_7_cast_fp16 = select(a = var_908_to_fp16, b = scores_5_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_7_cast_fp16")];
            tensor<int32, []> var_910 = const()[name = tensor<string, []>("op_910"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_3_cast_fp16 = softmax(axis = var_910, x = scores_7_cast_fp16)[name = tensor<string, []>("probs_3_cast_fp16")];
            tensor<bool, []> var_913_transpose_x_0 = const()[name = tensor<string, []>("op_913_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_913_transpose_y_0 = const()[name = tensor<string, []>("op_913_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_3_cast_fp16 = transpose(perm = var_887, x = sa_v_out_3_cast_fp16)[name = tensor<string, []>("transpose_251")];
            tensor<fp16, [1, 12, 1, 64]> var_913_cast_fp16 = matmul(transpose_x = var_913_transpose_x_0, transpose_y = var_913_transpose_y_0, x = probs_3_cast_fp16, y = v_t_3_cast_fp16)[name = tensor<string, []>("op_913_cast_fp16")];
            tensor<int32, [4]> var_918 = const()[name = tensor<string, []>("op_918"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_923 = const()[name = tensor<string, []>("op_923"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_919_cast_fp16 = transpose(perm = var_918, x = var_913_cast_fp16)[name = tensor<string, []>("transpose_250")];
            tensor<fp16, [1, 1, 768]> input_21_cast_fp16 = reshape(shape = var_923, x = var_919_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_1_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10665152))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11255040))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_5_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_1_self_attention_o_net_weight_to_fp16_quantized, x = input_21_cast_fp16)[name = tensor<string, []>("linear_5_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_23_cast_fp16 = add(x = input_17_cast_fp16, y = linear_5_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
            tensor<int32, [1]> input_25_axes_0 = const()[name = tensor<string, []>("input_25_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_1_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_1_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11256640)))];
            tensor<fp16, []> var_931_to_fp16 = const()[name = tensor<string, []>("op_931_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_25_cast_fp16 = layer_norm(axes = input_25_axes_0, epsilon = var_931_to_fp16, gamma = dec_layers_1_norm_xattn_query_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11258240))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11356608))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_6_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized, x = input_25_cast_fp16)[name = tensor<string, []>("linear_6_cast_fp16")];
            tensor<int32, [4]> var_944 = const()[name = tensor<string, []>("op_944"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_3_cast_fp16 = reshape(shape = var_944, x = linear_6_cast_fp16)[name = tensor<string, []>("xq_proj_3_cast_fp16")];
            tensor<int32, [4]> var_962 = const()[name = tensor<string, []>("op_962"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_1_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_1_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_964_transpose_x_0 = const()[name = tensor<string, []>("op_964_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_964_transpose_y_0 = const()[name = tensor<string, []>("op_964_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_1_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_1_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_102_perm_0 = const()[name = tensor<string, []>("transpose_102_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_103_perm_0 = const()[name = tensor<string, []>("transpose_103_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_1_to_fp16 = cast(dtype = xa_k_1_to_fp16_dtype_0, x = xa_k_1)[name = tensor<string, []>("cast_68")];
            tensor<fp16, [1, 1, 128, 256]> transpose_103 = transpose(perm = transpose_103_perm_0, x = xa_k_1_to_fp16)[name = tensor<string, []>("transpose_248")];
            tensor<fp16, [1, 1, 1, 128]> transpose_102 = transpose(perm = transpose_102_perm_0, x = xq_proj_3_cast_fp16)[name = tensor<string, []>("transpose_249")];
            tensor<fp16, [1, 1, 1, 256]> var_964_cast_fp16 = matmul(transpose_x = var_964_transpose_x_0, transpose_y = var_964_transpose_y_0, x = transpose_102, y = transpose_103)[name = tensor<string, []>("op_964_cast_fp16")];
            tensor<fp16, []> var_965_to_fp16 = const()[name = tensor<string, []>("op_965_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_5_cast_fp16 = mul(x = var_964_cast_fp16, y = var_965_to_fp16)[name = tensor<string, []>("xscores_5_cast_fp16")];
            tensor<fp16, []> var_983_to_fp16 = const()[name = tensor<string, []>("op_983_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_7_cast_fp16 = select(a = var_983_to_fp16, b = xscores_5_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_7_cast_fp16")];
            tensor<int32, []> var_985 = const()[name = tensor<string, []>("op_985"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_3_cast_fp16 = softmax(axis = var_985, x = xscores_7_cast_fp16)[name = tensor<string, []>("xprobs_3_cast_fp16")];
            tensor<bool, []> var_988_transpose_x_0 = const()[name = tensor<string, []>("op_988_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_988_transpose_y_0 = const()[name = tensor<string, []>("op_988_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_1_to_fp16 = cast(dtype = xa_v_1_to_fp16_dtype_0, x = xa_v_1)[name = tensor<string, []>("cast_67")];
            tensor<fp16, [1, 1, 256, 128]> xvT_3_cast_fp16 = transpose(perm = var_962, x = xa_v_1_to_fp16)[name = tensor<string, []>("transpose_247")];
            tensor<fp16, [1, 1, 1, 128]> var_988_cast_fp16 = matmul(transpose_x = var_988_transpose_x_0, transpose_y = var_988_transpose_y_0, x = xprobs_3_cast_fp16, y = xvT_3_cast_fp16)[name = tensor<string, []>("op_988_cast_fp16")];
            tensor<int32, [4]> var_993 = const()[name = tensor<string, []>("op_993"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_998 = const()[name = tensor<string, []>("op_998"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_994_cast_fp16 = transpose(perm = var_993, x = var_988_cast_fp16)[name = tensor<string, []>("transpose_246")];
            tensor<fp16, [1, 1, 128]> input_27_cast_fp16 = reshape(shape = var_998, x = var_994_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11356928))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11455296))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = tensor<string, []>("linear_7_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_29_cast_fp16 = add(x = input_23_cast_fp16, y = linear_7_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
            tensor<int32, [1]> x_9_axes_0 = const()[name = tensor<string, []>("x_9_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_1_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_1_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11456896)))];
            tensor<fp16, []> var_1006_to_fp16 = const()[name = tensor<string, []>("op_1006_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_9_cast_fp16 = layer_norm(axes = x_9_axes_0, epsilon = var_1006_to_fp16, gamma = dec_layers_1_norm_pos_ff_weight_to_fp16, x = input_29_cast_fp16)[name = tensor<string, []>("x_9_cast_fp16")];
            tensor<int32, [3]> var_1022 = const()[name = tensor<string, []>("op_1022"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_5_pad_type_0 = const()[name = tensor<string, []>("y_5_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_5_strides_0 = const()[name = tensor<string, []>("y_5_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_5_pad_0 = const()[name = tensor<string, []>("y_5_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_5_dilations_0 = const()[name = tensor<string, []>("y_5_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_5_groups_0 = const()[name = tensor<string, []>("y_5_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_1_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(11458496))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13817856))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_11_cast_fp16 = transpose(perm = var_1022, x = x_9_cast_fp16)[name = tensor<string, []>("transpose_245")];
            tensor<fp16, [1, 3072, 1]> y_5_cast_fp16 = conv(dilations = y_5_dilations_0, groups = y_5_groups_0, pad = y_5_pad_0, pad_type = y_5_pad_type_0, strides = y_5_strides_0, weight = dec_layers_1_pos_ff_proj_weight_to_fp16_quantized, x = x_11_cast_fp16)[name = tensor<string, []>("y_5_cast_fp16")];
            tensor<string, []> x_13_mode_0 = const()[name = tensor<string, []>("x_13_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_13_cast_fp16 = gelu(mode = x_13_mode_0, x = y_5_cast_fp16)[name = tensor<string, []>("x_13_cast_fp16")];
            tensor<string, []> y_7_pad_type_0 = const()[name = tensor<string, []>("y_7_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_7_strides_0 = const()[name = tensor<string, []>("y_7_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_7_pad_0 = const()[name = tensor<string, []>("y_7_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_7_dilations_0 = const()[name = tensor<string, []>("y_7_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_7_groups_0 = const()[name = tensor<string, []>("y_7_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13824064))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16183424))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_7_cast_fp16 = conv(dilations = y_7_dilations_0, groups = y_7_groups_0, pad = y_7_pad_0, pad_type = y_7_pad_type_0, strides = y_7_strides_0, weight = dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized, x = x_13_cast_fp16)[name = tensor<string, []>("y_7_cast_fp16")];
            tensor<int32, [3]> var_1040 = const()[name = tensor<string, []>("op_1040"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_1041_cast_fp16 = transpose(perm = var_1040, x = y_7_cast_fp16)[name = tensor<string, []>("transpose_244")];
            tensor<fp16, [1, 1, 768]> input_31_cast_fp16 = add(x = input_29_cast_fp16, y = var_1041_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
            tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_2_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_2_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16185024)))];
            tensor<fp16, []> var_1045_to_fp16 = const()[name = tensor<string, []>("op_1045_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, epsilon = var_1045_to_fp16, gamma = dec_layers_2_norm_self_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(16186624))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17956160))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_8_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized, x = input_33_cast_fp16)[name = tensor<string, []>("linear_8_cast_fp16")];
            tensor<int32, [5]> var_1059 = const()[name = tensor<string, []>("op_1059"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_11_cast_fp16 = reshape(shape = var_1059, x = linear_8_cast_fp16)[name = tensor<string, []>("qkv_11_cast_fp16")];
            tensor<int32, [5]> q_5_begin_0 = const()[name = tensor<string, []>("q_5_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_5_end_0 = const()[name = tensor<string, []>("q_5_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_5_end_mask_0 = const()[name = tensor<string, []>("q_5_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_5_squeeze_mask_0 = const()[name = tensor<string, []>("q_5_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_5_cast_fp16 = slice_by_index(begin = q_5_begin_0, end = q_5_end_0, end_mask = q_5_end_mask_0, squeeze_mask = q_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor<string, []>("q_5_cast_fp16")];
            tensor<int32, [5]> new_k_5_begin_0 = const()[name = tensor<string, []>("new_k_5_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_5_end_0 = const()[name = tensor<string, []>("new_k_5_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_5_end_mask_0 = const()[name = tensor<string, []>("new_k_5_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_5_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_5_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_5_cast_fp16 = slice_by_index(begin = new_k_5_begin_0, end = new_k_5_end_0, end_mask = new_k_5_end_mask_0, squeeze_mask = new_k_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor<string, []>("new_k_5_cast_fp16")];
            tensor<int32, [5]> new_v_5_begin_0 = const()[name = tensor<string, []>("new_v_5_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_5_end_0 = const()[name = tensor<string, []>("new_v_5_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_5_end_mask_0 = const()[name = tensor<string, []>("new_v_5_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_5_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_5_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_5_cast_fp16 = slice_by_index(begin = new_v_5_begin_0, end = new_v_5_end_0, end_mask = new_v_5_end_mask_0, squeeze_mask = new_v_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor<string, []>("new_v_5_cast_fp16")];
            tensor<string, []> sa_k_in_2_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_2_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_2_to_fp16 = cast(dtype = sa_k_in_2_to_fp16_dtype_0, x = sa_k_in_2)[name = tensor<string, []>("cast_66")];
            tensor<fp16, [1, 600, 12, 64]> var_1120_cast_fp16 = mul(x = sa_k_in_2_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_1120_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_1121_cast_fp16 = mul(x = new_k_5_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_1121_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_5_cast_fp16 = add(x = var_1120_cast_fp16, y = var_1121_cast_fp16)[name = tensor<string, []>("sa_k_out_5_cast_fp16")];
            tensor<string, []> sa_k_out_5_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_5_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_2_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_2_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_2_to_fp16 = cast(dtype = sa_v_in_2_to_fp16_dtype_0, x = sa_v_in_2)[name = tensor<string, []>("cast_65")];
            tensor<fp16, [1, 600, 12, 64]> var_1127_cast_fp16 = mul(x = sa_v_in_2_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_1127_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_1128_cast_fp16 = mul(x = new_v_5_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_1128_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_5_cast_fp16 = add(x = var_1127_cast_fp16, y = var_1128_cast_fp16)[name = tensor<string, []>("sa_v_out_5_cast_fp16")];
            tensor<string, []> sa_v_out_5_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_5_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_1147 = const()[name = tensor<string, []>("op_1147"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_1149_transpose_x_0 = const()[name = tensor<string, []>("op_1149_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1149_transpose_y_0 = const()[name = tensor<string, []>("op_1149_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_104_perm_0 = const()[name = tensor<string, []>("transpose_104_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_105_perm_0 = const()[name = tensor<string, []>("transpose_105_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_105 = transpose(perm = transpose_105_perm_0, x = sa_k_out_5_cast_fp16)[name = tensor<string, []>("transpose_242")];
            tensor<fp16, [1, 12, 1, 64]> transpose_104 = transpose(perm = transpose_104_perm_0, x = q_5_cast_fp16)[name = tensor<string, []>("transpose_243")];
            tensor<fp16, [1, 12, 1, 600]> var_1149_cast_fp16 = matmul(transpose_x = var_1149_transpose_x_0, transpose_y = var_1149_transpose_y_0, x = transpose_104, y = transpose_105)[name = tensor<string, []>("op_1149_cast_fp16")];
            tensor<fp16, []> var_1150_to_fp16 = const()[name = tensor<string, []>("op_1150_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_9_cast_fp16 = mul(x = var_1149_cast_fp16, y = var_1150_to_fp16)[name = tensor<string, []>("scores_9_cast_fp16")];
            tensor<fp16, []> var_1168_to_fp16 = const()[name = tensor<string, []>("op_1168_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_11_cast_fp16 = select(a = var_1168_to_fp16, b = scores_9_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_11_cast_fp16")];
            tensor<int32, []> var_1170 = const()[name = tensor<string, []>("op_1170"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_5_cast_fp16 = softmax(axis = var_1170, x = scores_11_cast_fp16)[name = tensor<string, []>("probs_5_cast_fp16")];
            tensor<bool, []> var_1173_transpose_x_0 = const()[name = tensor<string, []>("op_1173_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1173_transpose_y_0 = const()[name = tensor<string, []>("op_1173_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_5_cast_fp16 = transpose(perm = var_1147, x = sa_v_out_5_cast_fp16)[name = tensor<string, []>("transpose_241")];
            tensor<fp16, [1, 12, 1, 64]> var_1173_cast_fp16 = matmul(transpose_x = var_1173_transpose_x_0, transpose_y = var_1173_transpose_y_0, x = probs_5_cast_fp16, y = v_t_5_cast_fp16)[name = tensor<string, []>("op_1173_cast_fp16")];
            tensor<int32, [4]> var_1178 = const()[name = tensor<string, []>("op_1178"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1183 = const()[name = tensor<string, []>("op_1183"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_1179_cast_fp16 = transpose(perm = var_1178, x = var_1173_cast_fp16)[name = tensor<string, []>("transpose_240")];
            tensor<fp16, [1, 1, 768]> input_35_cast_fp16 = reshape(shape = var_1183, x = var_1179_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_2_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17960832))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18550720))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_2_self_attention_o_net_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = tensor<string, []>("linear_9_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_37_cast_fp16 = add(x = input_31_cast_fp16, y = linear_9_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
            tensor<int32, [1]> input_39_axes_0 = const()[name = tensor<string, []>("input_39_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_2_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_2_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18552320)))];
            tensor<fp16, []> var_1191_to_fp16 = const()[name = tensor<string, []>("op_1191_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_39_cast_fp16 = layer_norm(axes = input_39_axes_0, epsilon = var_1191_to_fp16, gamma = dec_layers_2_norm_xattn_query_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18553920))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18652288))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_10_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized, x = input_39_cast_fp16)[name = tensor<string, []>("linear_10_cast_fp16")];
            tensor<int32, [4]> var_1204 = const()[name = tensor<string, []>("op_1204"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_5_cast_fp16 = reshape(shape = var_1204, x = linear_10_cast_fp16)[name = tensor<string, []>("xq_proj_5_cast_fp16")];
            tensor<int32, [4]> var_1222 = const()[name = tensor<string, []>("op_1222"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_2_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_2_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_1224_transpose_x_0 = const()[name = tensor<string, []>("op_1224_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1224_transpose_y_0 = const()[name = tensor<string, []>("op_1224_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_2_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_2_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_106_perm_0 = const()[name = tensor<string, []>("transpose_106_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_107_perm_0 = const()[name = tensor<string, []>("transpose_107_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_2_to_fp16 = cast(dtype = xa_k_2_to_fp16_dtype_0, x = xa_k_2)[name = tensor<string, []>("cast_64")];
            tensor<fp16, [1, 1, 128, 256]> transpose_107 = transpose(perm = transpose_107_perm_0, x = xa_k_2_to_fp16)[name = tensor<string, []>("transpose_238")];
            tensor<fp16, [1, 1, 1, 128]> transpose_106 = transpose(perm = transpose_106_perm_0, x = xq_proj_5_cast_fp16)[name = tensor<string, []>("transpose_239")];
            tensor<fp16, [1, 1, 1, 256]> var_1224_cast_fp16 = matmul(transpose_x = var_1224_transpose_x_0, transpose_y = var_1224_transpose_y_0, x = transpose_106, y = transpose_107)[name = tensor<string, []>("op_1224_cast_fp16")];
            tensor<fp16, []> var_1225_to_fp16 = const()[name = tensor<string, []>("op_1225_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_9_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor<string, []>("xscores_9_cast_fp16")];
            tensor<fp16, []> var_1243_to_fp16 = const()[name = tensor<string, []>("op_1243_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_11_cast_fp16 = select(a = var_1243_to_fp16, b = xscores_9_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_11_cast_fp16")];
            tensor<int32, []> var_1245 = const()[name = tensor<string, []>("op_1245"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_5_cast_fp16 = softmax(axis = var_1245, x = xscores_11_cast_fp16)[name = tensor<string, []>("xprobs_5_cast_fp16")];
            tensor<bool, []> var_1248_transpose_x_0 = const()[name = tensor<string, []>("op_1248_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1248_transpose_y_0 = const()[name = tensor<string, []>("op_1248_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_2_to_fp16 = cast(dtype = xa_v_2_to_fp16_dtype_0, x = xa_v_2)[name = tensor<string, []>("cast_63")];
            tensor<fp16, [1, 1, 256, 128]> xvT_5_cast_fp16 = transpose(perm = var_1222, x = xa_v_2_to_fp16)[name = tensor<string, []>("transpose_237")];
            tensor<fp16, [1, 1, 1, 128]> var_1248_cast_fp16 = matmul(transpose_x = var_1248_transpose_x_0, transpose_y = var_1248_transpose_y_0, x = xprobs_5_cast_fp16, y = xvT_5_cast_fp16)[name = tensor<string, []>("op_1248_cast_fp16")];
            tensor<int32, [4]> var_1253 = const()[name = tensor<string, []>("op_1253"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1258 = const()[name = tensor<string, []>("op_1258"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_1254_cast_fp16 = transpose(perm = var_1253, x = var_1248_cast_fp16)[name = tensor<string, []>("transpose_236")];
            tensor<fp16, [1, 1, 128]> input_41_cast_fp16 = reshape(shape = var_1258, x = var_1254_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18652608))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18750976))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_11_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized, x = input_41_cast_fp16)[name = tensor<string, []>("linear_11_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_43_cast_fp16 = add(x = input_37_cast_fp16, y = linear_11_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
            tensor<int32, [1]> x_17_axes_0 = const()[name = tensor<string, []>("x_17_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_2_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_2_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18752576)))];
            tensor<fp16, []> var_1266_to_fp16 = const()[name = tensor<string, []>("op_1266_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_17_cast_fp16 = layer_norm(axes = x_17_axes_0, epsilon = var_1266_to_fp16, gamma = dec_layers_2_norm_pos_ff_weight_to_fp16, x = input_43_cast_fp16)[name = tensor<string, []>("x_17_cast_fp16")];
            tensor<int32, [3]> var_1282 = const()[name = tensor<string, []>("op_1282"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_9_pad_type_0 = const()[name = tensor<string, []>("y_9_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_9_strides_0 = const()[name = tensor<string, []>("y_9_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_9_pad_0 = const()[name = tensor<string, []>("y_9_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_9_dilations_0 = const()[name = tensor<string, []>("y_9_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_9_groups_0 = const()[name = tensor<string, []>("y_9_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_2_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18754176))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21113536))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_19_cast_fp16 = transpose(perm = var_1282, x = x_17_cast_fp16)[name = tensor<string, []>("transpose_235")];
            tensor<fp16, [1, 3072, 1]> y_9_cast_fp16 = conv(dilations = y_9_dilations_0, groups = y_9_groups_0, pad = y_9_pad_0, pad_type = y_9_pad_type_0, strides = y_9_strides_0, weight = dec_layers_2_pos_ff_proj_weight_to_fp16_quantized, x = x_19_cast_fp16)[name = tensor<string, []>("y_9_cast_fp16")];
            tensor<string, []> x_21_mode_0 = const()[name = tensor<string, []>("x_21_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_21_cast_fp16 = gelu(mode = x_21_mode_0, x = y_9_cast_fp16)[name = tensor<string, []>("x_21_cast_fp16")];
            tensor<string, []> y_11_pad_type_0 = const()[name = tensor<string, []>("y_11_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_11_strides_0 = const()[name = tensor<string, []>("y_11_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_11_pad_0 = const()[name = tensor<string, []>("y_11_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_11_dilations_0 = const()[name = tensor<string, []>("y_11_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_11_groups_0 = const()[name = tensor<string, []>("y_11_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(21119744))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23479104))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_11_cast_fp16 = conv(dilations = y_11_dilations_0, groups = y_11_groups_0, pad = y_11_pad_0, pad_type = y_11_pad_type_0, strides = y_11_strides_0, weight = dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized, x = x_21_cast_fp16)[name = tensor<string, []>("y_11_cast_fp16")];
            tensor<int32, [3]> var_1300 = const()[name = tensor<string, []>("op_1300"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_1301_cast_fp16 = transpose(perm = var_1300, x = y_11_cast_fp16)[name = tensor<string, []>("transpose_234")];
            tensor<fp16, [1, 1, 768]> input_45_cast_fp16 = add(x = input_43_cast_fp16, y = var_1301_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
            tensor<int32, [1]> input_47_axes_0 = const()[name = tensor<string, []>("input_47_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_3_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_3_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23480704)))];
            tensor<fp16, []> var_1305_to_fp16 = const()[name = tensor<string, []>("op_1305_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_47_cast_fp16 = layer_norm(axes = input_47_axes_0, epsilon = var_1305_to_fp16, gamma = dec_layers_3_norm_self_weight_to_fp16, x = input_45_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23482304))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25251840))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_12_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized, x = input_47_cast_fp16)[name = tensor<string, []>("linear_12_cast_fp16")];
            tensor<int32, [5]> var_1319 = const()[name = tensor<string, []>("op_1319"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_15_cast_fp16 = reshape(shape = var_1319, x = linear_12_cast_fp16)[name = tensor<string, []>("qkv_15_cast_fp16")];
            tensor<int32, [5]> q_7_begin_0 = const()[name = tensor<string, []>("q_7_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_7_end_0 = const()[name = tensor<string, []>("q_7_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_7_end_mask_0 = const()[name = tensor<string, []>("q_7_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_7_squeeze_mask_0 = const()[name = tensor<string, []>("q_7_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_7_cast_fp16 = slice_by_index(begin = q_7_begin_0, end = q_7_end_0, end_mask = q_7_end_mask_0, squeeze_mask = q_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor<string, []>("q_7_cast_fp16")];
            tensor<int32, [5]> new_k_7_begin_0 = const()[name = tensor<string, []>("new_k_7_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_7_end_0 = const()[name = tensor<string, []>("new_k_7_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_7_end_mask_0 = const()[name = tensor<string, []>("new_k_7_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_7_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_7_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_7_cast_fp16 = slice_by_index(begin = new_k_7_begin_0, end = new_k_7_end_0, end_mask = new_k_7_end_mask_0, squeeze_mask = new_k_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor<string, []>("new_k_7_cast_fp16")];
            tensor<int32, [5]> new_v_7_begin_0 = const()[name = tensor<string, []>("new_v_7_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_7_end_0 = const()[name = tensor<string, []>("new_v_7_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_7_end_mask_0 = const()[name = tensor<string, []>("new_v_7_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_7_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_7_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_7_cast_fp16 = slice_by_index(begin = new_v_7_begin_0, end = new_v_7_end_0, end_mask = new_v_7_end_mask_0, squeeze_mask = new_v_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor<string, []>("new_v_7_cast_fp16")];
            tensor<string, []> sa_k_in_3_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_3_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_3_to_fp16 = cast(dtype = sa_k_in_3_to_fp16_dtype_0, x = sa_k_in_3)[name = tensor<string, []>("cast_62")];
            tensor<fp16, [1, 600, 12, 64]> var_1380_cast_fp16 = mul(x = sa_k_in_3_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_1380_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_1381_cast_fp16 = mul(x = new_k_7_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_1381_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_7_cast_fp16 = add(x = var_1380_cast_fp16, y = var_1381_cast_fp16)[name = tensor<string, []>("sa_k_out_7_cast_fp16")];
            tensor<string, []> sa_k_out_7_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_7_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_3_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_3_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_3_to_fp16 = cast(dtype = sa_v_in_3_to_fp16_dtype_0, x = sa_v_in_3)[name = tensor<string, []>("cast_61")];
            tensor<fp16, [1, 600, 12, 64]> var_1387_cast_fp16 = mul(x = sa_v_in_3_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_1387_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_1388_cast_fp16 = mul(x = new_v_7_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_1388_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_7_cast_fp16 = add(x = var_1387_cast_fp16, y = var_1388_cast_fp16)[name = tensor<string, []>("sa_v_out_7_cast_fp16")];
            tensor<string, []> sa_v_out_7_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_7_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_1407 = const()[name = tensor<string, []>("op_1407"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_1409_transpose_x_0 = const()[name = tensor<string, []>("op_1409_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1409_transpose_y_0 = const()[name = tensor<string, []>("op_1409_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_108_perm_0 = const()[name = tensor<string, []>("transpose_108_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_109_perm_0 = const()[name = tensor<string, []>("transpose_109_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_109 = transpose(perm = transpose_109_perm_0, x = sa_k_out_7_cast_fp16)[name = tensor<string, []>("transpose_232")];
            tensor<fp16, [1, 12, 1, 64]> transpose_108 = transpose(perm = transpose_108_perm_0, x = q_7_cast_fp16)[name = tensor<string, []>("transpose_233")];
            tensor<fp16, [1, 12, 1, 600]> var_1409_cast_fp16 = matmul(transpose_x = var_1409_transpose_x_0, transpose_y = var_1409_transpose_y_0, x = transpose_108, y = transpose_109)[name = tensor<string, []>("op_1409_cast_fp16")];
            tensor<fp16, []> var_1410_to_fp16 = const()[name = tensor<string, []>("op_1410_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_13_cast_fp16 = mul(x = var_1409_cast_fp16, y = var_1410_to_fp16)[name = tensor<string, []>("scores_13_cast_fp16")];
            tensor<fp16, []> var_1428_to_fp16 = const()[name = tensor<string, []>("op_1428_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_15_cast_fp16 = select(a = var_1428_to_fp16, b = scores_13_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_15_cast_fp16")];
            tensor<int32, []> var_1430 = const()[name = tensor<string, []>("op_1430"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_7_cast_fp16 = softmax(axis = var_1430, x = scores_15_cast_fp16)[name = tensor<string, []>("probs_7_cast_fp16")];
            tensor<bool, []> var_1433_transpose_x_0 = const()[name = tensor<string, []>("op_1433_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1433_transpose_y_0 = const()[name = tensor<string, []>("op_1433_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_7_cast_fp16 = transpose(perm = var_1407, x = sa_v_out_7_cast_fp16)[name = tensor<string, []>("transpose_231")];
            tensor<fp16, [1, 12, 1, 64]> var_1433_cast_fp16 = matmul(transpose_x = var_1433_transpose_x_0, transpose_y = var_1433_transpose_y_0, x = probs_7_cast_fp16, y = v_t_7_cast_fp16)[name = tensor<string, []>("op_1433_cast_fp16")];
            tensor<int32, [4]> var_1438 = const()[name = tensor<string, []>("op_1438"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1443 = const()[name = tensor<string, []>("op_1443"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_1439_cast_fp16 = transpose(perm = var_1438, x = var_1433_cast_fp16)[name = tensor<string, []>("transpose_230")];
            tensor<fp16, [1, 1, 768]> input_49_cast_fp16 = reshape(shape = var_1443, x = var_1439_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_3_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25256512))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25846400))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_3_self_attention_o_net_weight_to_fp16_quantized, x = input_49_cast_fp16)[name = tensor<string, []>("linear_13_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_51_cast_fp16 = add(x = input_45_cast_fp16, y = linear_13_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
            tensor<int32, [1]> input_53_axes_0 = const()[name = tensor<string, []>("input_53_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_3_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_3_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25848000)))];
            tensor<fp16, []> var_1451_to_fp16 = const()[name = tensor<string, []>("op_1451_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_53_cast_fp16 = layer_norm(axes = input_53_axes_0, epsilon = var_1451_to_fp16, gamma = dec_layers_3_norm_xattn_query_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25849600))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25947968))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_14_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized, x = input_53_cast_fp16)[name = tensor<string, []>("linear_14_cast_fp16")];
            tensor<int32, [4]> var_1464 = const()[name = tensor<string, []>("op_1464"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_7_cast_fp16 = reshape(shape = var_1464, x = linear_14_cast_fp16)[name = tensor<string, []>("xq_proj_7_cast_fp16")];
            tensor<int32, [4]> var_1482 = const()[name = tensor<string, []>("op_1482"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_3_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_3_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_1484_transpose_x_0 = const()[name = tensor<string, []>("op_1484_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1484_transpose_y_0 = const()[name = tensor<string, []>("op_1484_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_3_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_3_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_110_perm_0 = const()[name = tensor<string, []>("transpose_110_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_111_perm_0 = const()[name = tensor<string, []>("transpose_111_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_3_to_fp16 = cast(dtype = xa_k_3_to_fp16_dtype_0, x = xa_k_3)[name = tensor<string, []>("cast_60")];
            tensor<fp16, [1, 1, 128, 256]> transpose_111 = transpose(perm = transpose_111_perm_0, x = xa_k_3_to_fp16)[name = tensor<string, []>("transpose_228")];
            tensor<fp16, [1, 1, 1, 128]> transpose_110 = transpose(perm = transpose_110_perm_0, x = xq_proj_7_cast_fp16)[name = tensor<string, []>("transpose_229")];
            tensor<fp16, [1, 1, 1, 256]> var_1484_cast_fp16 = matmul(transpose_x = var_1484_transpose_x_0, transpose_y = var_1484_transpose_y_0, x = transpose_110, y = transpose_111)[name = tensor<string, []>("op_1484_cast_fp16")];
            tensor<fp16, []> var_1485_to_fp16 = const()[name = tensor<string, []>("op_1485_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_13_cast_fp16 = mul(x = var_1484_cast_fp16, y = var_1485_to_fp16)[name = tensor<string, []>("xscores_13_cast_fp16")];
            tensor<fp16, []> var_1503_to_fp16 = const()[name = tensor<string, []>("op_1503_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_15_cast_fp16 = select(a = var_1503_to_fp16, b = xscores_13_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_15_cast_fp16")];
            tensor<int32, []> var_1505 = const()[name = tensor<string, []>("op_1505"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_7_cast_fp16 = softmax(axis = var_1505, x = xscores_15_cast_fp16)[name = tensor<string, []>("xprobs_7_cast_fp16")];
            tensor<bool, []> var_1508_transpose_x_0 = const()[name = tensor<string, []>("op_1508_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1508_transpose_y_0 = const()[name = tensor<string, []>("op_1508_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_3_to_fp16 = cast(dtype = xa_v_3_to_fp16_dtype_0, x = xa_v_3)[name = tensor<string, []>("cast_59")];
            tensor<fp16, [1, 1, 256, 128]> xvT_7_cast_fp16 = transpose(perm = var_1482, x = xa_v_3_to_fp16)[name = tensor<string, []>("transpose_227")];
            tensor<fp16, [1, 1, 1, 128]> var_1508_cast_fp16 = matmul(transpose_x = var_1508_transpose_x_0, transpose_y = var_1508_transpose_y_0, x = xprobs_7_cast_fp16, y = xvT_7_cast_fp16)[name = tensor<string, []>("op_1508_cast_fp16")];
            tensor<int32, [4]> var_1513 = const()[name = tensor<string, []>("op_1513"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1518 = const()[name = tensor<string, []>("op_1518"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_1514_cast_fp16 = transpose(perm = var_1513, x = var_1508_cast_fp16)[name = tensor<string, []>("transpose_226")];
            tensor<fp16, [1, 1, 128]> input_55_cast_fp16 = reshape(shape = var_1518, x = var_1514_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25948288))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26046656))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_15_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized, x = input_55_cast_fp16)[name = tensor<string, []>("linear_15_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_57_cast_fp16 = add(x = input_51_cast_fp16, y = linear_15_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
            tensor<int32, [1]> x_25_axes_0 = const()[name = tensor<string, []>("x_25_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_3_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_3_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26048256)))];
            tensor<fp16, []> var_1526_to_fp16 = const()[name = tensor<string, []>("op_1526_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_25_cast_fp16 = layer_norm(axes = x_25_axes_0, epsilon = var_1526_to_fp16, gamma = dec_layers_3_norm_pos_ff_weight_to_fp16, x = input_57_cast_fp16)[name = tensor<string, []>("x_25_cast_fp16")];
            tensor<int32, [3]> var_1542 = const()[name = tensor<string, []>("op_1542"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_13_pad_type_0 = const()[name = tensor<string, []>("y_13_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_13_strides_0 = const()[name = tensor<string, []>("y_13_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_13_pad_0 = const()[name = tensor<string, []>("y_13_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_13_dilations_0 = const()[name = tensor<string, []>("y_13_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_13_groups_0 = const()[name = tensor<string, []>("y_13_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_3_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(26049856))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28409216))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_27_cast_fp16 = transpose(perm = var_1542, x = x_25_cast_fp16)[name = tensor<string, []>("transpose_225")];
            tensor<fp16, [1, 3072, 1]> y_13_cast_fp16 = conv(dilations = y_13_dilations_0, groups = y_13_groups_0, pad = y_13_pad_0, pad_type = y_13_pad_type_0, strides = y_13_strides_0, weight = dec_layers_3_pos_ff_proj_weight_to_fp16_quantized, x = x_27_cast_fp16)[name = tensor<string, []>("y_13_cast_fp16")];
            tensor<string, []> x_29_mode_0 = const()[name = tensor<string, []>("x_29_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = y_13_cast_fp16)[name = tensor<string, []>("x_29_cast_fp16")];
            tensor<string, []> y_15_pad_type_0 = const()[name = tensor<string, []>("y_15_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_15_strides_0 = const()[name = tensor<string, []>("y_15_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_15_pad_0 = const()[name = tensor<string, []>("y_15_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_15_dilations_0 = const()[name = tensor<string, []>("y_15_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_15_groups_0 = const()[name = tensor<string, []>("y_15_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28415424))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30774784))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_15_cast_fp16 = conv(dilations = y_15_dilations_0, groups = y_15_groups_0, pad = y_15_pad_0, pad_type = y_15_pad_type_0, strides = y_15_strides_0, weight = dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized, x = x_29_cast_fp16)[name = tensor<string, []>("y_15_cast_fp16")];
            tensor<int32, [3]> var_1560 = const()[name = tensor<string, []>("op_1560"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_1561_cast_fp16 = transpose(perm = var_1560, x = y_15_cast_fp16)[name = tensor<string, []>("transpose_224")];
            tensor<fp16, [1, 1, 768]> input_59_cast_fp16 = add(x = input_57_cast_fp16, y = var_1561_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
            tensor<int32, [1]> input_61_axes_0 = const()[name = tensor<string, []>("input_61_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_4_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_4_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30776384)))];
            tensor<fp16, []> var_1565_to_fp16 = const()[name = tensor<string, []>("op_1565_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_61_cast_fp16 = layer_norm(axes = input_61_axes_0, epsilon = var_1565_to_fp16, gamma = dec_layers_4_norm_self_weight_to_fp16, x = input_59_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30777984))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32547520))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_16_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized, x = input_61_cast_fp16)[name = tensor<string, []>("linear_16_cast_fp16")];
            tensor<int32, [5]> var_1579 = const()[name = tensor<string, []>("op_1579"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_19_cast_fp16 = reshape(shape = var_1579, x = linear_16_cast_fp16)[name = tensor<string, []>("qkv_19_cast_fp16")];
            tensor<int32, [5]> q_9_begin_0 = const()[name = tensor<string, []>("q_9_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_9_end_0 = const()[name = tensor<string, []>("q_9_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_9_end_mask_0 = const()[name = tensor<string, []>("q_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_9_squeeze_mask_0 = const()[name = tensor<string, []>("q_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_9_cast_fp16 = slice_by_index(begin = q_9_begin_0, end = q_9_end_0, end_mask = q_9_end_mask_0, squeeze_mask = q_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor<string, []>("q_9_cast_fp16")];
            tensor<int32, [5]> new_k_9_begin_0 = const()[name = tensor<string, []>("new_k_9_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_9_end_0 = const()[name = tensor<string, []>("new_k_9_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_9_end_mask_0 = const()[name = tensor<string, []>("new_k_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_9_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_9_cast_fp16 = slice_by_index(begin = new_k_9_begin_0, end = new_k_9_end_0, end_mask = new_k_9_end_mask_0, squeeze_mask = new_k_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor<string, []>("new_k_9_cast_fp16")];
            tensor<int32, [5]> new_v_9_begin_0 = const()[name = tensor<string, []>("new_v_9_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_9_end_0 = const()[name = tensor<string, []>("new_v_9_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_9_end_mask_0 = const()[name = tensor<string, []>("new_v_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_9_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_9_cast_fp16 = slice_by_index(begin = new_v_9_begin_0, end = new_v_9_end_0, end_mask = new_v_9_end_mask_0, squeeze_mask = new_v_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor<string, []>("new_v_9_cast_fp16")];
            tensor<string, []> sa_k_in_4_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_4_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_4_to_fp16 = cast(dtype = sa_k_in_4_to_fp16_dtype_0, x = sa_k_in_4)[name = tensor<string, []>("cast_58")];
            tensor<fp16, [1, 600, 12, 64]> var_1640_cast_fp16 = mul(x = sa_k_in_4_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_1640_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_1641_cast_fp16 = mul(x = new_k_9_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_1641_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_9_cast_fp16 = add(x = var_1640_cast_fp16, y = var_1641_cast_fp16)[name = tensor<string, []>("sa_k_out_9_cast_fp16")];
            tensor<string, []> sa_k_out_9_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_9_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_4_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_4_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_4_to_fp16 = cast(dtype = sa_v_in_4_to_fp16_dtype_0, x = sa_v_in_4)[name = tensor<string, []>("cast_57")];
            tensor<fp16, [1, 600, 12, 64]> var_1647_cast_fp16 = mul(x = sa_v_in_4_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_1647_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_1648_cast_fp16 = mul(x = new_v_9_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_1648_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_9_cast_fp16 = add(x = var_1647_cast_fp16, y = var_1648_cast_fp16)[name = tensor<string, []>("sa_v_out_9_cast_fp16")];
            tensor<string, []> sa_v_out_9_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_9_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_1667 = const()[name = tensor<string, []>("op_1667"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_1669_transpose_x_0 = const()[name = tensor<string, []>("op_1669_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1669_transpose_y_0 = const()[name = tensor<string, []>("op_1669_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_112_perm_0 = const()[name = tensor<string, []>("transpose_112_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_113_perm_0 = const()[name = tensor<string, []>("transpose_113_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_113 = transpose(perm = transpose_113_perm_0, x = sa_k_out_9_cast_fp16)[name = tensor<string, []>("transpose_222")];
            tensor<fp16, [1, 12, 1, 64]> transpose_112 = transpose(perm = transpose_112_perm_0, x = q_9_cast_fp16)[name = tensor<string, []>("transpose_223")];
            tensor<fp16, [1, 12, 1, 600]> var_1669_cast_fp16 = matmul(transpose_x = var_1669_transpose_x_0, transpose_y = var_1669_transpose_y_0, x = transpose_112, y = transpose_113)[name = tensor<string, []>("op_1669_cast_fp16")];
            tensor<fp16, []> var_1670_to_fp16 = const()[name = tensor<string, []>("op_1670_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_17_cast_fp16 = mul(x = var_1669_cast_fp16, y = var_1670_to_fp16)[name = tensor<string, []>("scores_17_cast_fp16")];
            tensor<fp16, []> var_1688_to_fp16 = const()[name = tensor<string, []>("op_1688_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_19_cast_fp16 = select(a = var_1688_to_fp16, b = scores_17_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_19_cast_fp16")];
            tensor<int32, []> var_1690 = const()[name = tensor<string, []>("op_1690"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_9_cast_fp16 = softmax(axis = var_1690, x = scores_19_cast_fp16)[name = tensor<string, []>("probs_9_cast_fp16")];
            tensor<bool, []> var_1693_transpose_x_0 = const()[name = tensor<string, []>("op_1693_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1693_transpose_y_0 = const()[name = tensor<string, []>("op_1693_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_9_cast_fp16 = transpose(perm = var_1667, x = sa_v_out_9_cast_fp16)[name = tensor<string, []>("transpose_221")];
            tensor<fp16, [1, 12, 1, 64]> var_1693_cast_fp16 = matmul(transpose_x = var_1693_transpose_x_0, transpose_y = var_1693_transpose_y_0, x = probs_9_cast_fp16, y = v_t_9_cast_fp16)[name = tensor<string, []>("op_1693_cast_fp16")];
            tensor<int32, [4]> var_1698 = const()[name = tensor<string, []>("op_1698"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1703 = const()[name = tensor<string, []>("op_1703"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_1699_cast_fp16 = transpose(perm = var_1698, x = var_1693_cast_fp16)[name = tensor<string, []>("transpose_220")];
            tensor<fp16, [1, 1, 768]> input_63_cast_fp16 = reshape(shape = var_1703, x = var_1699_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_4_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32552192))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33142080))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_4_self_attention_o_net_weight_to_fp16_quantized, x = input_63_cast_fp16)[name = tensor<string, []>("linear_17_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_65_cast_fp16 = add(x = input_59_cast_fp16, y = linear_17_cast_fp16)[name = tensor<string, []>("input_65_cast_fp16")];
            tensor<int32, [1]> input_67_axes_0 = const()[name = tensor<string, []>("input_67_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_4_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_4_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33143680)))];
            tensor<fp16, []> var_1711_to_fp16 = const()[name = tensor<string, []>("op_1711_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_67_cast_fp16 = layer_norm(axes = input_67_axes_0, epsilon = var_1711_to_fp16, gamma = dec_layers_4_norm_xattn_query_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33145280))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33243648))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_18_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = tensor<string, []>("linear_18_cast_fp16")];
            tensor<int32, [4]> var_1724 = const()[name = tensor<string, []>("op_1724"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_9_cast_fp16 = reshape(shape = var_1724, x = linear_18_cast_fp16)[name = tensor<string, []>("xq_proj_9_cast_fp16")];
            tensor<int32, [4]> var_1742 = const()[name = tensor<string, []>("op_1742"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_4_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_4_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_1744_transpose_x_0 = const()[name = tensor<string, []>("op_1744_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1744_transpose_y_0 = const()[name = tensor<string, []>("op_1744_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_4_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_4_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_114_perm_0 = const()[name = tensor<string, []>("transpose_114_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_115_perm_0 = const()[name = tensor<string, []>("transpose_115_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_4_to_fp16 = cast(dtype = xa_k_4_to_fp16_dtype_0, x = xa_k_4)[name = tensor<string, []>("cast_56")];
            tensor<fp16, [1, 1, 128, 256]> transpose_115 = transpose(perm = transpose_115_perm_0, x = xa_k_4_to_fp16)[name = tensor<string, []>("transpose_218")];
            tensor<fp16, [1, 1, 1, 128]> transpose_114 = transpose(perm = transpose_114_perm_0, x = xq_proj_9_cast_fp16)[name = tensor<string, []>("transpose_219")];
            tensor<fp16, [1, 1, 1, 256]> var_1744_cast_fp16 = matmul(transpose_x = var_1744_transpose_x_0, transpose_y = var_1744_transpose_y_0, x = transpose_114, y = transpose_115)[name = tensor<string, []>("op_1744_cast_fp16")];
            tensor<fp16, []> var_1745_to_fp16 = const()[name = tensor<string, []>("op_1745_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_17_cast_fp16 = mul(x = var_1744_cast_fp16, y = var_1745_to_fp16)[name = tensor<string, []>("xscores_17_cast_fp16")];
            tensor<fp16, []> var_1763_to_fp16 = const()[name = tensor<string, []>("op_1763_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_19_cast_fp16 = select(a = var_1763_to_fp16, b = xscores_17_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_19_cast_fp16")];
            tensor<int32, []> var_1765 = const()[name = tensor<string, []>("op_1765"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_9_cast_fp16 = softmax(axis = var_1765, x = xscores_19_cast_fp16)[name = tensor<string, []>("xprobs_9_cast_fp16")];
            tensor<bool, []> var_1768_transpose_x_0 = const()[name = tensor<string, []>("op_1768_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1768_transpose_y_0 = const()[name = tensor<string, []>("op_1768_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_4_to_fp16 = cast(dtype = xa_v_4_to_fp16_dtype_0, x = xa_v_4)[name = tensor<string, []>("cast_55")];
            tensor<fp16, [1, 1, 256, 128]> xvT_9_cast_fp16 = transpose(perm = var_1742, x = xa_v_4_to_fp16)[name = tensor<string, []>("transpose_217")];
            tensor<fp16, [1, 1, 1, 128]> var_1768_cast_fp16 = matmul(transpose_x = var_1768_transpose_x_0, transpose_y = var_1768_transpose_y_0, x = xprobs_9_cast_fp16, y = xvT_9_cast_fp16)[name = tensor<string, []>("op_1768_cast_fp16")];
            tensor<int32, [4]> var_1773 = const()[name = tensor<string, []>("op_1773"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1778 = const()[name = tensor<string, []>("op_1778"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_1774_cast_fp16 = transpose(perm = var_1773, x = var_1768_cast_fp16)[name = tensor<string, []>("transpose_216")];
            tensor<fp16, [1, 1, 128]> input_69_cast_fp16 = reshape(shape = var_1778, x = var_1774_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33243968))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33342336))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized, x = input_69_cast_fp16)[name = tensor<string, []>("linear_19_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_71_cast_fp16 = add(x = input_65_cast_fp16, y = linear_19_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
            tensor<int32, [1]> x_33_axes_0 = const()[name = tensor<string, []>("x_33_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_4_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_4_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33343936)))];
            tensor<fp16, []> var_1786_to_fp16 = const()[name = tensor<string, []>("op_1786_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_33_cast_fp16 = layer_norm(axes = x_33_axes_0, epsilon = var_1786_to_fp16, gamma = dec_layers_4_norm_pos_ff_weight_to_fp16, x = input_71_cast_fp16)[name = tensor<string, []>("x_33_cast_fp16")];
            tensor<int32, [3]> var_1802 = const()[name = tensor<string, []>("op_1802"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_17_pad_type_0 = const()[name = tensor<string, []>("y_17_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_17_strides_0 = const()[name = tensor<string, []>("y_17_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_17_pad_0 = const()[name = tensor<string, []>("y_17_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_17_dilations_0 = const()[name = tensor<string, []>("y_17_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_17_groups_0 = const()[name = tensor<string, []>("y_17_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_4_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33345536))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35704896))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_35_cast_fp16 = transpose(perm = var_1802, x = x_33_cast_fp16)[name = tensor<string, []>("transpose_215")];
            tensor<fp16, [1, 3072, 1]> y_17_cast_fp16 = conv(dilations = y_17_dilations_0, groups = y_17_groups_0, pad = y_17_pad_0, pad_type = y_17_pad_type_0, strides = y_17_strides_0, weight = dec_layers_4_pos_ff_proj_weight_to_fp16_quantized, x = x_35_cast_fp16)[name = tensor<string, []>("y_17_cast_fp16")];
            tensor<string, []> x_37_mode_0 = const()[name = tensor<string, []>("x_37_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_37_cast_fp16 = gelu(mode = x_37_mode_0, x = y_17_cast_fp16)[name = tensor<string, []>("x_37_cast_fp16")];
            tensor<string, []> y_19_pad_type_0 = const()[name = tensor<string, []>("y_19_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_19_strides_0 = const()[name = tensor<string, []>("y_19_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_19_pad_0 = const()[name = tensor<string, []>("y_19_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_19_dilations_0 = const()[name = tensor<string, []>("y_19_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_19_groups_0 = const()[name = tensor<string, []>("y_19_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35711104))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38070464))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_19_cast_fp16 = conv(dilations = y_19_dilations_0, groups = y_19_groups_0, pad = y_19_pad_0, pad_type = y_19_pad_type_0, strides = y_19_strides_0, weight = dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized, x = x_37_cast_fp16)[name = tensor<string, []>("y_19_cast_fp16")];
            tensor<int32, [3]> var_1820 = const()[name = tensor<string, []>("op_1820"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_1821_cast_fp16 = transpose(perm = var_1820, x = y_19_cast_fp16)[name = tensor<string, []>("transpose_214")];
            tensor<fp16, [1, 1, 768]> input_73_cast_fp16 = add(x = input_71_cast_fp16, y = var_1821_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
            tensor<int32, [1]> input_75_axes_0 = const()[name = tensor<string, []>("input_75_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_5_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_5_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38072064)))];
            tensor<fp16, []> var_1825_to_fp16 = const()[name = tensor<string, []>("op_1825_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_75_cast_fp16 = layer_norm(axes = input_75_axes_0, epsilon = var_1825_to_fp16, gamma = dec_layers_5_norm_self_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("input_75_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38073664))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39843200))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = tensor<string, []>("linear_20_cast_fp16")];
            tensor<int32, [5]> var_1839 = const()[name = tensor<string, []>("op_1839"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_23_cast_fp16 = reshape(shape = var_1839, x = linear_20_cast_fp16)[name = tensor<string, []>("qkv_23_cast_fp16")];
            tensor<int32, [5]> q_11_begin_0 = const()[name = tensor<string, []>("q_11_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_11_end_0 = const()[name = tensor<string, []>("q_11_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_11_end_mask_0 = const()[name = tensor<string, []>("q_11_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_11_squeeze_mask_0 = const()[name = tensor<string, []>("q_11_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_11_cast_fp16 = slice_by_index(begin = q_11_begin_0, end = q_11_end_0, end_mask = q_11_end_mask_0, squeeze_mask = q_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor<string, []>("q_11_cast_fp16")];
            tensor<int32, [5]> new_k_11_begin_0 = const()[name = tensor<string, []>("new_k_11_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_11_end_0 = const()[name = tensor<string, []>("new_k_11_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_11_end_mask_0 = const()[name = tensor<string, []>("new_k_11_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_11_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_11_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_11_cast_fp16 = slice_by_index(begin = new_k_11_begin_0, end = new_k_11_end_0, end_mask = new_k_11_end_mask_0, squeeze_mask = new_k_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor<string, []>("new_k_11_cast_fp16")];
            tensor<int32, [5]> new_v_11_begin_0 = const()[name = tensor<string, []>("new_v_11_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_11_end_0 = const()[name = tensor<string, []>("new_v_11_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_11_end_mask_0 = const()[name = tensor<string, []>("new_v_11_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_11_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_11_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_11_cast_fp16 = slice_by_index(begin = new_v_11_begin_0, end = new_v_11_end_0, end_mask = new_v_11_end_mask_0, squeeze_mask = new_v_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor<string, []>("new_v_11_cast_fp16")];
            tensor<string, []> sa_k_in_5_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_5_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_5_to_fp16 = cast(dtype = sa_k_in_5_to_fp16_dtype_0, x = sa_k_in_5)[name = tensor<string, []>("cast_54")];
            tensor<fp16, [1, 600, 12, 64]> var_1900_cast_fp16 = mul(x = sa_k_in_5_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_1900_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_1901_cast_fp16 = mul(x = new_k_11_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_1901_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_11_cast_fp16 = add(x = var_1900_cast_fp16, y = var_1901_cast_fp16)[name = tensor<string, []>("sa_k_out_11_cast_fp16")];
            tensor<string, []> sa_k_out_11_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_11_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_5_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_5_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_5_to_fp16 = cast(dtype = sa_v_in_5_to_fp16_dtype_0, x = sa_v_in_5)[name = tensor<string, []>("cast_53")];
            tensor<fp16, [1, 600, 12, 64]> var_1907_cast_fp16 = mul(x = sa_v_in_5_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_1907_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_1908_cast_fp16 = mul(x = new_v_11_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_1908_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_11_cast_fp16 = add(x = var_1907_cast_fp16, y = var_1908_cast_fp16)[name = tensor<string, []>("sa_v_out_11_cast_fp16")];
            tensor<string, []> sa_v_out_11_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_11_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_1927 = const()[name = tensor<string, []>("op_1927"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_1929_transpose_x_0 = const()[name = tensor<string, []>("op_1929_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1929_transpose_y_0 = const()[name = tensor<string, []>("op_1929_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_116_perm_0 = const()[name = tensor<string, []>("transpose_116_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_117_perm_0 = const()[name = tensor<string, []>("transpose_117_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_117 = transpose(perm = transpose_117_perm_0, x = sa_k_out_11_cast_fp16)[name = tensor<string, []>("transpose_212")];
            tensor<fp16, [1, 12, 1, 64]> transpose_116 = transpose(perm = transpose_116_perm_0, x = q_11_cast_fp16)[name = tensor<string, []>("transpose_213")];
            tensor<fp16, [1, 12, 1, 600]> var_1929_cast_fp16 = matmul(transpose_x = var_1929_transpose_x_0, transpose_y = var_1929_transpose_y_0, x = transpose_116, y = transpose_117)[name = tensor<string, []>("op_1929_cast_fp16")];
            tensor<fp16, []> var_1930_to_fp16 = const()[name = tensor<string, []>("op_1930_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_21_cast_fp16 = mul(x = var_1929_cast_fp16, y = var_1930_to_fp16)[name = tensor<string, []>("scores_21_cast_fp16")];
            tensor<fp16, []> var_1948_to_fp16 = const()[name = tensor<string, []>("op_1948_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_23_cast_fp16 = select(a = var_1948_to_fp16, b = scores_21_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_23_cast_fp16")];
            tensor<int32, []> var_1950 = const()[name = tensor<string, []>("op_1950"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_11_cast_fp16 = softmax(axis = var_1950, x = scores_23_cast_fp16)[name = tensor<string, []>("probs_11_cast_fp16")];
            tensor<bool, []> var_1953_transpose_x_0 = const()[name = tensor<string, []>("op_1953_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_1953_transpose_y_0 = const()[name = tensor<string, []>("op_1953_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_11_cast_fp16 = transpose(perm = var_1927, x = sa_v_out_11_cast_fp16)[name = tensor<string, []>("transpose_211")];
            tensor<fp16, [1, 12, 1, 64]> var_1953_cast_fp16 = matmul(transpose_x = var_1953_transpose_x_0, transpose_y = var_1953_transpose_y_0, x = probs_11_cast_fp16, y = v_t_11_cast_fp16)[name = tensor<string, []>("op_1953_cast_fp16")];
            tensor<int32, [4]> var_1958 = const()[name = tensor<string, []>("op_1958"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1963 = const()[name = tensor<string, []>("op_1963"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_1959_cast_fp16 = transpose(perm = var_1958, x = var_1953_cast_fp16)[name = tensor<string, []>("transpose_210")];
            tensor<fp16, [1, 1, 768]> input_77_cast_fp16 = reshape(shape = var_1963, x = var_1959_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_5_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39847872))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40437760))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_21_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_5_self_attention_o_net_weight_to_fp16_quantized, x = input_77_cast_fp16)[name = tensor<string, []>("linear_21_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_79_cast_fp16 = add(x = input_73_cast_fp16, y = linear_21_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
            tensor<int32, [1]> input_81_axes_0 = const()[name = tensor<string, []>("input_81_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_5_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_5_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40439360)))];
            tensor<fp16, []> var_1971_to_fp16 = const()[name = tensor<string, []>("op_1971_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_81_cast_fp16 = layer_norm(axes = input_81_axes_0, epsilon = var_1971_to_fp16, gamma = dec_layers_5_norm_xattn_query_weight_to_fp16, x = input_79_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40440960))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40539328))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_22_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized, x = input_81_cast_fp16)[name = tensor<string, []>("linear_22_cast_fp16")];
            tensor<int32, [4]> var_1984 = const()[name = tensor<string, []>("op_1984"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_11_cast_fp16 = reshape(shape = var_1984, x = linear_22_cast_fp16)[name = tensor<string, []>("xq_proj_11_cast_fp16")];
            tensor<int32, [4]> var_2002 = const()[name = tensor<string, []>("op_2002"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_5_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_5_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_2004_transpose_x_0 = const()[name = tensor<string, []>("op_2004_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2004_transpose_y_0 = const()[name = tensor<string, []>("op_2004_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_5_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_5_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_118_perm_0 = const()[name = tensor<string, []>("transpose_118_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_119_perm_0 = const()[name = tensor<string, []>("transpose_119_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_5_to_fp16 = cast(dtype = xa_k_5_to_fp16_dtype_0, x = xa_k_5)[name = tensor<string, []>("cast_52")];
            tensor<fp16, [1, 1, 128, 256]> transpose_119 = transpose(perm = transpose_119_perm_0, x = xa_k_5_to_fp16)[name = tensor<string, []>("transpose_208")];
            tensor<fp16, [1, 1, 1, 128]> transpose_118 = transpose(perm = transpose_118_perm_0, x = xq_proj_11_cast_fp16)[name = tensor<string, []>("transpose_209")];
            tensor<fp16, [1, 1, 1, 256]> var_2004_cast_fp16 = matmul(transpose_x = var_2004_transpose_x_0, transpose_y = var_2004_transpose_y_0, x = transpose_118, y = transpose_119)[name = tensor<string, []>("op_2004_cast_fp16")];
            tensor<fp16, []> var_2005_to_fp16 = const()[name = tensor<string, []>("op_2005_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_21_cast_fp16 = mul(x = var_2004_cast_fp16, y = var_2005_to_fp16)[name = tensor<string, []>("xscores_21_cast_fp16")];
            tensor<fp16, []> var_2023_to_fp16 = const()[name = tensor<string, []>("op_2023_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_23_cast_fp16 = select(a = var_2023_to_fp16, b = xscores_21_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_23_cast_fp16")];
            tensor<int32, []> var_2025 = const()[name = tensor<string, []>("op_2025"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_11_cast_fp16 = softmax(axis = var_2025, x = xscores_23_cast_fp16)[name = tensor<string, []>("xprobs_11_cast_fp16")];
            tensor<bool, []> var_2028_transpose_x_0 = const()[name = tensor<string, []>("op_2028_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2028_transpose_y_0 = const()[name = tensor<string, []>("op_2028_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_5_to_fp16 = cast(dtype = xa_v_5_to_fp16_dtype_0, x = xa_v_5)[name = tensor<string, []>("cast_51")];
            tensor<fp16, [1, 1, 256, 128]> xvT_11_cast_fp16 = transpose(perm = var_2002, x = xa_v_5_to_fp16)[name = tensor<string, []>("transpose_207")];
            tensor<fp16, [1, 1, 1, 128]> var_2028_cast_fp16 = matmul(transpose_x = var_2028_transpose_x_0, transpose_y = var_2028_transpose_y_0, x = xprobs_11_cast_fp16, y = xvT_11_cast_fp16)[name = tensor<string, []>("op_2028_cast_fp16")];
            tensor<int32, [4]> var_2033 = const()[name = tensor<string, []>("op_2033"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_2038 = const()[name = tensor<string, []>("op_2038"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_2034_cast_fp16 = transpose(perm = var_2033, x = var_2028_cast_fp16)[name = tensor<string, []>("transpose_206")];
            tensor<fp16, [1, 1, 128]> input_83_cast_fp16 = reshape(shape = var_2038, x = var_2034_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40539648))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40638016))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = tensor<string, []>("linear_23_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_85_cast_fp16 = add(x = input_79_cast_fp16, y = linear_23_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
            tensor<int32, [1]> x_41_axes_0 = const()[name = tensor<string, []>("x_41_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_5_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_5_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40639616)))];
            tensor<fp16, []> var_2046_to_fp16 = const()[name = tensor<string, []>("op_2046_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_41_cast_fp16 = layer_norm(axes = x_41_axes_0, epsilon = var_2046_to_fp16, gamma = dec_layers_5_norm_pos_ff_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("x_41_cast_fp16")];
            tensor<int32, [3]> var_2062 = const()[name = tensor<string, []>("op_2062"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_21_pad_type_0 = const()[name = tensor<string, []>("y_21_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_21_strides_0 = const()[name = tensor<string, []>("y_21_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_21_pad_0 = const()[name = tensor<string, []>("y_21_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_21_dilations_0 = const()[name = tensor<string, []>("y_21_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_21_groups_0 = const()[name = tensor<string, []>("y_21_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_5_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40641216))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43000576))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_43_cast_fp16 = transpose(perm = var_2062, x = x_41_cast_fp16)[name = tensor<string, []>("transpose_205")];
            tensor<fp16, [1, 3072, 1]> y_21_cast_fp16 = conv(dilations = y_21_dilations_0, groups = y_21_groups_0, pad = y_21_pad_0, pad_type = y_21_pad_type_0, strides = y_21_strides_0, weight = dec_layers_5_pos_ff_proj_weight_to_fp16_quantized, x = x_43_cast_fp16)[name = tensor<string, []>("y_21_cast_fp16")];
            tensor<string, []> x_45_mode_0 = const()[name = tensor<string, []>("x_45_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_45_cast_fp16 = gelu(mode = x_45_mode_0, x = y_21_cast_fp16)[name = tensor<string, []>("x_45_cast_fp16")];
            tensor<string, []> y_23_pad_type_0 = const()[name = tensor<string, []>("y_23_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_23_strides_0 = const()[name = tensor<string, []>("y_23_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_23_pad_0 = const()[name = tensor<string, []>("y_23_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_23_dilations_0 = const()[name = tensor<string, []>("y_23_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_23_groups_0 = const()[name = tensor<string, []>("y_23_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43006784))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45366144))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_23_cast_fp16 = conv(dilations = y_23_dilations_0, groups = y_23_groups_0, pad = y_23_pad_0, pad_type = y_23_pad_type_0, strides = y_23_strides_0, weight = dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized, x = x_45_cast_fp16)[name = tensor<string, []>("y_23_cast_fp16")];
            tensor<int32, [3]> var_2080 = const()[name = tensor<string, []>("op_2080"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_2081_cast_fp16 = transpose(perm = var_2080, x = y_23_cast_fp16)[name = tensor<string, []>("transpose_204")];
            tensor<fp16, [1, 1, 768]> input_87_cast_fp16 = add(x = input_85_cast_fp16, y = var_2081_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
            tensor<int32, [1]> input_89_axes_0 = const()[name = tensor<string, []>("input_89_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_6_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_6_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45367744)))];
            tensor<fp16, []> var_2085_to_fp16 = const()[name = tensor<string, []>("op_2085_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_89_cast_fp16 = layer_norm(axes = input_89_axes_0, epsilon = var_2085_to_fp16, gamma = dec_layers_6_norm_self_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45369344))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47138880))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_24_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized, x = input_89_cast_fp16)[name = tensor<string, []>("linear_24_cast_fp16")];
            tensor<int32, [5]> var_2099 = const()[name = tensor<string, []>("op_2099"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_27_cast_fp16 = reshape(shape = var_2099, x = linear_24_cast_fp16)[name = tensor<string, []>("qkv_27_cast_fp16")];
            tensor<int32, [5]> q_13_begin_0 = const()[name = tensor<string, []>("q_13_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_13_end_0 = const()[name = tensor<string, []>("q_13_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_13_end_mask_0 = const()[name = tensor<string, []>("q_13_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_13_squeeze_mask_0 = const()[name = tensor<string, []>("q_13_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_13_cast_fp16 = slice_by_index(begin = q_13_begin_0, end = q_13_end_0, end_mask = q_13_end_mask_0, squeeze_mask = q_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor<string, []>("q_13_cast_fp16")];
            tensor<int32, [5]> new_k_13_begin_0 = const()[name = tensor<string, []>("new_k_13_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_13_end_0 = const()[name = tensor<string, []>("new_k_13_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_13_end_mask_0 = const()[name = tensor<string, []>("new_k_13_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_13_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_13_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_13_cast_fp16 = slice_by_index(begin = new_k_13_begin_0, end = new_k_13_end_0, end_mask = new_k_13_end_mask_0, squeeze_mask = new_k_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor<string, []>("new_k_13_cast_fp16")];
            tensor<int32, [5]> new_v_13_begin_0 = const()[name = tensor<string, []>("new_v_13_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_13_end_0 = const()[name = tensor<string, []>("new_v_13_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_13_end_mask_0 = const()[name = tensor<string, []>("new_v_13_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_13_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_13_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_13_cast_fp16 = slice_by_index(begin = new_v_13_begin_0, end = new_v_13_end_0, end_mask = new_v_13_end_mask_0, squeeze_mask = new_v_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor<string, []>("new_v_13_cast_fp16")];
            tensor<string, []> sa_k_in_6_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_6_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_6_to_fp16 = cast(dtype = sa_k_in_6_to_fp16_dtype_0, x = sa_k_in_6)[name = tensor<string, []>("cast_50")];
            tensor<fp16, [1, 600, 12, 64]> var_2160_cast_fp16 = mul(x = sa_k_in_6_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_2160_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_2161_cast_fp16 = mul(x = new_k_13_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_2161_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_13_cast_fp16 = add(x = var_2160_cast_fp16, y = var_2161_cast_fp16)[name = tensor<string, []>("sa_k_out_13_cast_fp16")];
            tensor<string, []> sa_k_out_13_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_13_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_6_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_6_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_6_to_fp16 = cast(dtype = sa_v_in_6_to_fp16_dtype_0, x = sa_v_in_6)[name = tensor<string, []>("cast_49")];
            tensor<fp16, [1, 600, 12, 64]> var_2167_cast_fp16 = mul(x = sa_v_in_6_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_2167_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_2168_cast_fp16 = mul(x = new_v_13_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_2168_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_13_cast_fp16 = add(x = var_2167_cast_fp16, y = var_2168_cast_fp16)[name = tensor<string, []>("sa_v_out_13_cast_fp16")];
            tensor<string, []> sa_v_out_13_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_13_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_2187 = const()[name = tensor<string, []>("op_2187"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_2189_transpose_x_0 = const()[name = tensor<string, []>("op_2189_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2189_transpose_y_0 = const()[name = tensor<string, []>("op_2189_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_120_perm_0 = const()[name = tensor<string, []>("transpose_120_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_121_perm_0 = const()[name = tensor<string, []>("transpose_121_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_121 = transpose(perm = transpose_121_perm_0, x = sa_k_out_13_cast_fp16)[name = tensor<string, []>("transpose_202")];
            tensor<fp16, [1, 12, 1, 64]> transpose_120 = transpose(perm = transpose_120_perm_0, x = q_13_cast_fp16)[name = tensor<string, []>("transpose_203")];
            tensor<fp16, [1, 12, 1, 600]> var_2189_cast_fp16 = matmul(transpose_x = var_2189_transpose_x_0, transpose_y = var_2189_transpose_y_0, x = transpose_120, y = transpose_121)[name = tensor<string, []>("op_2189_cast_fp16")];
            tensor<fp16, []> var_2190_to_fp16 = const()[name = tensor<string, []>("op_2190_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_25_cast_fp16 = mul(x = var_2189_cast_fp16, y = var_2190_to_fp16)[name = tensor<string, []>("scores_25_cast_fp16")];
            tensor<fp16, []> var_2208_to_fp16 = const()[name = tensor<string, []>("op_2208_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_27_cast_fp16 = select(a = var_2208_to_fp16, b = scores_25_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_27_cast_fp16")];
            tensor<int32, []> var_2210 = const()[name = tensor<string, []>("op_2210"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_13_cast_fp16 = softmax(axis = var_2210, x = scores_27_cast_fp16)[name = tensor<string, []>("probs_13_cast_fp16")];
            tensor<bool, []> var_2213_transpose_x_0 = const()[name = tensor<string, []>("op_2213_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2213_transpose_y_0 = const()[name = tensor<string, []>("op_2213_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_13_cast_fp16 = transpose(perm = var_2187, x = sa_v_out_13_cast_fp16)[name = tensor<string, []>("transpose_201")];
            tensor<fp16, [1, 12, 1, 64]> var_2213_cast_fp16 = matmul(transpose_x = var_2213_transpose_x_0, transpose_y = var_2213_transpose_y_0, x = probs_13_cast_fp16, y = v_t_13_cast_fp16)[name = tensor<string, []>("op_2213_cast_fp16")];
            tensor<int32, [4]> var_2218 = const()[name = tensor<string, []>("op_2218"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_2223 = const()[name = tensor<string, []>("op_2223"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_2219_cast_fp16 = transpose(perm = var_2218, x = var_2213_cast_fp16)[name = tensor<string, []>("transpose_200")];
            tensor<fp16, [1, 1, 768]> input_91_cast_fp16 = reshape(shape = var_2223, x = var_2219_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_6_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47143552))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47733440))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_6_self_attention_o_net_weight_to_fp16_quantized, x = input_91_cast_fp16)[name = tensor<string, []>("linear_25_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_93_cast_fp16 = add(x = input_87_cast_fp16, y = linear_25_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")];
            tensor<int32, [1]> input_95_axes_0 = const()[name = tensor<string, []>("input_95_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_6_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_6_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47735040)))];
            tensor<fp16, []> var_2231_to_fp16 = const()[name = tensor<string, []>("op_2231_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_95_cast_fp16 = layer_norm(axes = input_95_axes_0, epsilon = var_2231_to_fp16, gamma = dec_layers_6_norm_xattn_query_weight_to_fp16, x = input_93_cast_fp16)[name = tensor<string, []>("input_95_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47736640))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47835008))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_26_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized, x = input_95_cast_fp16)[name = tensor<string, []>("linear_26_cast_fp16")];
            tensor<int32, [4]> var_2244 = const()[name = tensor<string, []>("op_2244"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_13_cast_fp16 = reshape(shape = var_2244, x = linear_26_cast_fp16)[name = tensor<string, []>("xq_proj_13_cast_fp16")];
            tensor<int32, [4]> var_2262 = const()[name = tensor<string, []>("op_2262"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_6_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_6_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_2264_transpose_x_0 = const()[name = tensor<string, []>("op_2264_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2264_transpose_y_0 = const()[name = tensor<string, []>("op_2264_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_6_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_6_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_122_perm_0 = const()[name = tensor<string, []>("transpose_122_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_123_perm_0 = const()[name = tensor<string, []>("transpose_123_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_6_to_fp16 = cast(dtype = xa_k_6_to_fp16_dtype_0, x = xa_k_6)[name = tensor<string, []>("cast_48")];
            tensor<fp16, [1, 1, 128, 256]> transpose_123 = transpose(perm = transpose_123_perm_0, x = xa_k_6_to_fp16)[name = tensor<string, []>("transpose_198")];
            tensor<fp16, [1, 1, 1, 128]> transpose_122 = transpose(perm = transpose_122_perm_0, x = xq_proj_13_cast_fp16)[name = tensor<string, []>("transpose_199")];
            tensor<fp16, [1, 1, 1, 256]> var_2264_cast_fp16 = matmul(transpose_x = var_2264_transpose_x_0, transpose_y = var_2264_transpose_y_0, x = transpose_122, y = transpose_123)[name = tensor<string, []>("op_2264_cast_fp16")];
            tensor<fp16, []> var_2265_to_fp16 = const()[name = tensor<string, []>("op_2265_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_25_cast_fp16 = mul(x = var_2264_cast_fp16, y = var_2265_to_fp16)[name = tensor<string, []>("xscores_25_cast_fp16")];
            tensor<fp16, []> var_2283_to_fp16 = const()[name = tensor<string, []>("op_2283_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_27_cast_fp16 = select(a = var_2283_to_fp16, b = xscores_25_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_27_cast_fp16")];
            tensor<int32, []> var_2285 = const()[name = tensor<string, []>("op_2285"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_13_cast_fp16 = softmax(axis = var_2285, x = xscores_27_cast_fp16)[name = tensor<string, []>("xprobs_13_cast_fp16")];
            tensor<bool, []> var_2288_transpose_x_0 = const()[name = tensor<string, []>("op_2288_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2288_transpose_y_0 = const()[name = tensor<string, []>("op_2288_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_6_to_fp16 = cast(dtype = xa_v_6_to_fp16_dtype_0, x = xa_v_6)[name = tensor<string, []>("cast_47")];
            tensor<fp16, [1, 1, 256, 128]> xvT_13_cast_fp16 = transpose(perm = var_2262, x = xa_v_6_to_fp16)[name = tensor<string, []>("transpose_197")];
            tensor<fp16, [1, 1, 1, 128]> var_2288_cast_fp16 = matmul(transpose_x = var_2288_transpose_x_0, transpose_y = var_2288_transpose_y_0, x = xprobs_13_cast_fp16, y = xvT_13_cast_fp16)[name = tensor<string, []>("op_2288_cast_fp16")];
            tensor<int32, [4]> var_2293 = const()[name = tensor<string, []>("op_2293"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_2298 = const()[name = tensor<string, []>("op_2298"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_2294_cast_fp16 = transpose(perm = var_2293, x = var_2288_cast_fp16)[name = tensor<string, []>("transpose_196")];
            tensor<fp16, [1, 1, 128]> input_97_cast_fp16 = reshape(shape = var_2298, x = var_2294_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47835328))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47933696))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_27_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized, x = input_97_cast_fp16)[name = tensor<string, []>("linear_27_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_99_cast_fp16 = add(x = input_93_cast_fp16, y = linear_27_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
            tensor<int32, [1]> x_49_axes_0 = const()[name = tensor<string, []>("x_49_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_6_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_6_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47935296)))];
            tensor<fp16, []> var_2306_to_fp16 = const()[name = tensor<string, []>("op_2306_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_49_cast_fp16 = layer_norm(axes = x_49_axes_0, epsilon = var_2306_to_fp16, gamma = dec_layers_6_norm_pos_ff_weight_to_fp16, x = input_99_cast_fp16)[name = tensor<string, []>("x_49_cast_fp16")];
            tensor<int32, [3]> var_2322 = const()[name = tensor<string, []>("op_2322"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_25_pad_type_0 = const()[name = tensor<string, []>("y_25_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_25_strides_0 = const()[name = tensor<string, []>("y_25_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_25_pad_0 = const()[name = tensor<string, []>("y_25_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_25_dilations_0 = const()[name = tensor<string, []>("y_25_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_25_groups_0 = const()[name = tensor<string, []>("y_25_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_6_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47936896))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50296256))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_51_cast_fp16 = transpose(perm = var_2322, x = x_49_cast_fp16)[name = tensor<string, []>("transpose_195")];
            tensor<fp16, [1, 3072, 1]> y_25_cast_fp16 = conv(dilations = y_25_dilations_0, groups = y_25_groups_0, pad = y_25_pad_0, pad_type = y_25_pad_type_0, strides = y_25_strides_0, weight = dec_layers_6_pos_ff_proj_weight_to_fp16_quantized, x = x_51_cast_fp16)[name = tensor<string, []>("y_25_cast_fp16")];
            tensor<string, []> x_53_mode_0 = const()[name = tensor<string, []>("x_53_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = y_25_cast_fp16)[name = tensor<string, []>("x_53_cast_fp16")];
            tensor<string, []> y_27_pad_type_0 = const()[name = tensor<string, []>("y_27_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_27_strides_0 = const()[name = tensor<string, []>("y_27_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_27_pad_0 = const()[name = tensor<string, []>("y_27_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_27_dilations_0 = const()[name = tensor<string, []>("y_27_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_27_groups_0 = const()[name = tensor<string, []>("y_27_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50302464))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52661824))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_27_cast_fp16 = conv(dilations = y_27_dilations_0, groups = y_27_groups_0, pad = y_27_pad_0, pad_type = y_27_pad_type_0, strides = y_27_strides_0, weight = dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized, x = x_53_cast_fp16)[name = tensor<string, []>("y_27_cast_fp16")];
            tensor<int32, [3]> var_2340 = const()[name = tensor<string, []>("op_2340"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_2341_cast_fp16 = transpose(perm = var_2340, x = y_27_cast_fp16)[name = tensor<string, []>("transpose_194")];
            tensor<fp16, [1, 1, 768]> input_101_cast_fp16 = add(x = input_99_cast_fp16, y = var_2341_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
            tensor<int32, [1]> input_103_axes_0 = const()[name = tensor<string, []>("input_103_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_7_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_7_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52663424)))];
            tensor<fp16, []> var_2345_to_fp16 = const()[name = tensor<string, []>("op_2345_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_103_cast_fp16 = layer_norm(axes = input_103_axes_0, epsilon = var_2345_to_fp16, gamma = dec_layers_7_norm_self_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(52665024))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54434560))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized, x = input_103_cast_fp16)[name = tensor<string, []>("linear_28_cast_fp16")];
            tensor<int32, [5]> var_2359 = const()[name = tensor<string, []>("op_2359"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_31_cast_fp16 = reshape(shape = var_2359, x = linear_28_cast_fp16)[name = tensor<string, []>("qkv_31_cast_fp16")];
            tensor<int32, [5]> q_15_begin_0 = const()[name = tensor<string, []>("q_15_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_15_end_0 = const()[name = tensor<string, []>("q_15_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_15_end_mask_0 = const()[name = tensor<string, []>("q_15_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_15_squeeze_mask_0 = const()[name = tensor<string, []>("q_15_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_15_cast_fp16 = slice_by_index(begin = q_15_begin_0, end = q_15_end_0, end_mask = q_15_end_mask_0, squeeze_mask = q_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor<string, []>("q_15_cast_fp16")];
            tensor<int32, [5]> new_k_15_begin_0 = const()[name = tensor<string, []>("new_k_15_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_15_end_0 = const()[name = tensor<string, []>("new_k_15_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_15_end_mask_0 = const()[name = tensor<string, []>("new_k_15_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_15_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_15_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_15_cast_fp16 = slice_by_index(begin = new_k_15_begin_0, end = new_k_15_end_0, end_mask = new_k_15_end_mask_0, squeeze_mask = new_k_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor<string, []>("new_k_15_cast_fp16")];
            tensor<int32, [5]> new_v_15_begin_0 = const()[name = tensor<string, []>("new_v_15_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_15_end_0 = const()[name = tensor<string, []>("new_v_15_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_15_end_mask_0 = const()[name = tensor<string, []>("new_v_15_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_15_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_15_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_15_cast_fp16 = slice_by_index(begin = new_v_15_begin_0, end = new_v_15_end_0, end_mask = new_v_15_end_mask_0, squeeze_mask = new_v_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor<string, []>("new_v_15_cast_fp16")];
            tensor<string, []> sa_k_in_7_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_7_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_7_to_fp16 = cast(dtype = sa_k_in_7_to_fp16_dtype_0, x = sa_k_in_7)[name = tensor<string, []>("cast_46")];
            tensor<fp16, [1, 600, 12, 64]> var_2420_cast_fp16 = mul(x = sa_k_in_7_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_2420_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_2421_cast_fp16 = mul(x = new_k_15_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_2421_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_15_cast_fp16 = add(x = var_2420_cast_fp16, y = var_2421_cast_fp16)[name = tensor<string, []>("sa_k_out_15_cast_fp16")];
            tensor<string, []> sa_k_out_15_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_15_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_7_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_7_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_7_to_fp16 = cast(dtype = sa_v_in_7_to_fp16_dtype_0, x = sa_v_in_7)[name = tensor<string, []>("cast_45")];
            tensor<fp16, [1, 600, 12, 64]> var_2427_cast_fp16 = mul(x = sa_v_in_7_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_2427_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_2428_cast_fp16 = mul(x = new_v_15_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_2428_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_15_cast_fp16 = add(x = var_2427_cast_fp16, y = var_2428_cast_fp16)[name = tensor<string, []>("sa_v_out_15_cast_fp16")];
            tensor<string, []> sa_v_out_15_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_15_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_2447 = const()[name = tensor<string, []>("op_2447"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_2449_transpose_x_0 = const()[name = tensor<string, []>("op_2449_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2449_transpose_y_0 = const()[name = tensor<string, []>("op_2449_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_124_perm_0 = const()[name = tensor<string, []>("transpose_124_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_125_perm_0 = const()[name = tensor<string, []>("transpose_125_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_125 = transpose(perm = transpose_125_perm_0, x = sa_k_out_15_cast_fp16)[name = tensor<string, []>("transpose_192")];
            tensor<fp16, [1, 12, 1, 64]> transpose_124 = transpose(perm = transpose_124_perm_0, x = q_15_cast_fp16)[name = tensor<string, []>("transpose_193")];
            tensor<fp16, [1, 12, 1, 600]> var_2449_cast_fp16 = matmul(transpose_x = var_2449_transpose_x_0, transpose_y = var_2449_transpose_y_0, x = transpose_124, y = transpose_125)[name = tensor<string, []>("op_2449_cast_fp16")];
            tensor<fp16, []> var_2450_to_fp16 = const()[name = tensor<string, []>("op_2450_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_29_cast_fp16 = mul(x = var_2449_cast_fp16, y = var_2450_to_fp16)[name = tensor<string, []>("scores_29_cast_fp16")];
            tensor<fp16, []> var_2468_to_fp16 = const()[name = tensor<string, []>("op_2468_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_31_cast_fp16 = select(a = var_2468_to_fp16, b = scores_29_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_31_cast_fp16")];
            tensor<int32, []> var_2470 = const()[name = tensor<string, []>("op_2470"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_15_cast_fp16 = softmax(axis = var_2470, x = scores_31_cast_fp16)[name = tensor<string, []>("probs_15_cast_fp16")];
            tensor<bool, []> var_2473_transpose_x_0 = const()[name = tensor<string, []>("op_2473_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2473_transpose_y_0 = const()[name = tensor<string, []>("op_2473_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_15_cast_fp16 = transpose(perm = var_2447, x = sa_v_out_15_cast_fp16)[name = tensor<string, []>("transpose_191")];
            tensor<fp16, [1, 12, 1, 64]> var_2473_cast_fp16 = matmul(transpose_x = var_2473_transpose_x_0, transpose_y = var_2473_transpose_y_0, x = probs_15_cast_fp16, y = v_t_15_cast_fp16)[name = tensor<string, []>("op_2473_cast_fp16")];
            tensor<int32, [4]> var_2478 = const()[name = tensor<string, []>("op_2478"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_2483 = const()[name = tensor<string, []>("op_2483"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_2479_cast_fp16 = transpose(perm = var_2478, x = var_2473_cast_fp16)[name = tensor<string, []>("transpose_190")];
            tensor<fp16, [1, 1, 768]> input_105_cast_fp16 = reshape(shape = var_2483, x = var_2479_cast_fp16)[name = tensor<string, []>("input_105_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_7_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54439232))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55029120))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_7_self_attention_o_net_weight_to_fp16_quantized, x = input_105_cast_fp16)[name = tensor<string, []>("linear_29_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_107_cast_fp16 = add(x = input_101_cast_fp16, y = linear_29_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
            tensor<int32, [1]> input_109_axes_0 = const()[name = tensor<string, []>("input_109_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_7_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_7_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55030720)))];
            tensor<fp16, []> var_2491_to_fp16 = const()[name = tensor<string, []>("op_2491_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_109_cast_fp16 = layer_norm(axes = input_109_axes_0, epsilon = var_2491_to_fp16, gamma = dec_layers_7_norm_xattn_query_weight_to_fp16, x = input_107_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55032320))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55130688))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_30_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized, x = input_109_cast_fp16)[name = tensor<string, []>("linear_30_cast_fp16")];
            tensor<int32, [4]> var_2504 = const()[name = tensor<string, []>("op_2504"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_15_cast_fp16 = reshape(shape = var_2504, x = linear_30_cast_fp16)[name = tensor<string, []>("xq_proj_15_cast_fp16")];
            tensor<int32, [4]> var_2522 = const()[name = tensor<string, []>("op_2522"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_7_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_7_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_2524_transpose_x_0 = const()[name = tensor<string, []>("op_2524_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2524_transpose_y_0 = const()[name = tensor<string, []>("op_2524_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_7_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_7_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_126_perm_0 = const()[name = tensor<string, []>("transpose_126_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_127_perm_0 = const()[name = tensor<string, []>("transpose_127_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_7_to_fp16 = cast(dtype = xa_k_7_to_fp16_dtype_0, x = xa_k_7)[name = tensor<string, []>("cast_44")];
            tensor<fp16, [1, 1, 128, 256]> transpose_127 = transpose(perm = transpose_127_perm_0, x = xa_k_7_to_fp16)[name = tensor<string, []>("transpose_188")];
            tensor<fp16, [1, 1, 1, 128]> transpose_126 = transpose(perm = transpose_126_perm_0, x = xq_proj_15_cast_fp16)[name = tensor<string, []>("transpose_189")];
            tensor<fp16, [1, 1, 1, 256]> var_2524_cast_fp16 = matmul(transpose_x = var_2524_transpose_x_0, transpose_y = var_2524_transpose_y_0, x = transpose_126, y = transpose_127)[name = tensor<string, []>("op_2524_cast_fp16")];
            tensor<fp16, []> var_2525_to_fp16 = const()[name = tensor<string, []>("op_2525_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_29_cast_fp16 = mul(x = var_2524_cast_fp16, y = var_2525_to_fp16)[name = tensor<string, []>("xscores_29_cast_fp16")];
            tensor<fp16, []> var_2543_to_fp16 = const()[name = tensor<string, []>("op_2543_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_31_cast_fp16 = select(a = var_2543_to_fp16, b = xscores_29_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_31_cast_fp16")];
            tensor<int32, []> var_2545 = const()[name = tensor<string, []>("op_2545"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_15_cast_fp16 = softmax(axis = var_2545, x = xscores_31_cast_fp16)[name = tensor<string, []>("xprobs_15_cast_fp16")];
            tensor<bool, []> var_2548_transpose_x_0 = const()[name = tensor<string, []>("op_2548_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2548_transpose_y_0 = const()[name = tensor<string, []>("op_2548_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_7_to_fp16 = cast(dtype = xa_v_7_to_fp16_dtype_0, x = xa_v_7)[name = tensor<string, []>("cast_43")];
            tensor<fp16, [1, 1, 256, 128]> xvT_15_cast_fp16 = transpose(perm = var_2522, x = xa_v_7_to_fp16)[name = tensor<string, []>("transpose_187")];
            tensor<fp16, [1, 1, 1, 128]> var_2548_cast_fp16 = matmul(transpose_x = var_2548_transpose_x_0, transpose_y = var_2548_transpose_y_0, x = xprobs_15_cast_fp16, y = xvT_15_cast_fp16)[name = tensor<string, []>("op_2548_cast_fp16")];
            tensor<int32, [4]> var_2553 = const()[name = tensor<string, []>("op_2553"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_2558 = const()[name = tensor<string, []>("op_2558"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_2554_cast_fp16 = transpose(perm = var_2553, x = var_2548_cast_fp16)[name = tensor<string, []>("transpose_186")];
            tensor<fp16, [1, 1, 128]> input_111_cast_fp16 = reshape(shape = var_2558, x = var_2554_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55131008))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55229376))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized, x = input_111_cast_fp16)[name = tensor<string, []>("linear_31_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_113_cast_fp16 = add(x = input_107_cast_fp16, y = linear_31_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
            tensor<int32, [1]> x_57_axes_0 = const()[name = tensor<string, []>("x_57_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_7_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_7_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55230976)))];
            tensor<fp16, []> var_2566_to_fp16 = const()[name = tensor<string, []>("op_2566_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_57_cast_fp16 = layer_norm(axes = x_57_axes_0, epsilon = var_2566_to_fp16, gamma = dec_layers_7_norm_pos_ff_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("x_57_cast_fp16")];
            tensor<int32, [3]> var_2582 = const()[name = tensor<string, []>("op_2582"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_29_pad_type_0 = const()[name = tensor<string, []>("y_29_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_29_strides_0 = const()[name = tensor<string, []>("y_29_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_29_pad_0 = const()[name = tensor<string, []>("y_29_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_29_dilations_0 = const()[name = tensor<string, []>("y_29_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_29_groups_0 = const()[name = tensor<string, []>("y_29_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_7_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55232576))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57591936))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_59_cast_fp16 = transpose(perm = var_2582, x = x_57_cast_fp16)[name = tensor<string, []>("transpose_185")];
            tensor<fp16, [1, 3072, 1]> y_29_cast_fp16 = conv(dilations = y_29_dilations_0, groups = y_29_groups_0, pad = y_29_pad_0, pad_type = y_29_pad_type_0, strides = y_29_strides_0, weight = dec_layers_7_pos_ff_proj_weight_to_fp16_quantized, x = x_59_cast_fp16)[name = tensor<string, []>("y_29_cast_fp16")];
            tensor<string, []> x_61_mode_0 = const()[name = tensor<string, []>("x_61_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_61_cast_fp16 = gelu(mode = x_61_mode_0, x = y_29_cast_fp16)[name = tensor<string, []>("x_61_cast_fp16")];
            tensor<string, []> y_31_pad_type_0 = const()[name = tensor<string, []>("y_31_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_31_strides_0 = const()[name = tensor<string, []>("y_31_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_31_pad_0 = const()[name = tensor<string, []>("y_31_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_31_dilations_0 = const()[name = tensor<string, []>("y_31_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_31_groups_0 = const()[name = tensor<string, []>("y_31_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(57598144))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59957504))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_31_cast_fp16 = conv(dilations = y_31_dilations_0, groups = y_31_groups_0, pad = y_31_pad_0, pad_type = y_31_pad_type_0, strides = y_31_strides_0, weight = dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized, x = x_61_cast_fp16)[name = tensor<string, []>("y_31_cast_fp16")];
            tensor<int32, [3]> var_2600 = const()[name = tensor<string, []>("op_2600"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_2601_cast_fp16 = transpose(perm = var_2600, x = y_31_cast_fp16)[name = tensor<string, []>("transpose_184")];
            tensor<fp16, [1, 1, 768]> input_115_cast_fp16 = add(x = input_113_cast_fp16, y = var_2601_cast_fp16)[name = tensor<string, []>("input_115_cast_fp16")];
            tensor<int32, [1]> input_117_axes_0 = const()[name = tensor<string, []>("input_117_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_8_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_8_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59959104)))];
            tensor<fp16, []> var_2605_to_fp16 = const()[name = tensor<string, []>("op_2605_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_117_cast_fp16 = layer_norm(axes = input_117_axes_0, epsilon = var_2605_to_fp16, gamma = dec_layers_8_norm_self_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(59960704))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61730240))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_32_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized, x = input_117_cast_fp16)[name = tensor<string, []>("linear_32_cast_fp16")];
            tensor<int32, [5]> var_2619 = const()[name = tensor<string, []>("op_2619"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_35_cast_fp16 = reshape(shape = var_2619, x = linear_32_cast_fp16)[name = tensor<string, []>("qkv_35_cast_fp16")];
            tensor<int32, [5]> q_17_begin_0 = const()[name = tensor<string, []>("q_17_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_17_end_0 = const()[name = tensor<string, []>("q_17_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_17_end_mask_0 = const()[name = tensor<string, []>("q_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_17_squeeze_mask_0 = const()[name = tensor<string, []>("q_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_17_cast_fp16 = slice_by_index(begin = q_17_begin_0, end = q_17_end_0, end_mask = q_17_end_mask_0, squeeze_mask = q_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor<string, []>("q_17_cast_fp16")];
            tensor<int32, [5]> new_k_17_begin_0 = const()[name = tensor<string, []>("new_k_17_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_17_end_0 = const()[name = tensor<string, []>("new_k_17_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_17_end_mask_0 = const()[name = tensor<string, []>("new_k_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_17_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_17_cast_fp16 = slice_by_index(begin = new_k_17_begin_0, end = new_k_17_end_0, end_mask = new_k_17_end_mask_0, squeeze_mask = new_k_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor<string, []>("new_k_17_cast_fp16")];
            tensor<int32, [5]> new_v_17_begin_0 = const()[name = tensor<string, []>("new_v_17_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_17_end_0 = const()[name = tensor<string, []>("new_v_17_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_17_end_mask_0 = const()[name = tensor<string, []>("new_v_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_17_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_17_cast_fp16 = slice_by_index(begin = new_v_17_begin_0, end = new_v_17_end_0, end_mask = new_v_17_end_mask_0, squeeze_mask = new_v_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor<string, []>("new_v_17_cast_fp16")];
            tensor<string, []> sa_k_in_8_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_8_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_8_to_fp16 = cast(dtype = sa_k_in_8_to_fp16_dtype_0, x = sa_k_in_8)[name = tensor<string, []>("cast_42")];
            tensor<fp16, [1, 600, 12, 64]> var_2680_cast_fp16 = mul(x = sa_k_in_8_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_2680_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_2681_cast_fp16 = mul(x = new_k_17_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_2681_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_17_cast_fp16 = add(x = var_2680_cast_fp16, y = var_2681_cast_fp16)[name = tensor<string, []>("sa_k_out_17_cast_fp16")];
            tensor<string, []> sa_k_out_17_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_17_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_8_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_8_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_8_to_fp16 = cast(dtype = sa_v_in_8_to_fp16_dtype_0, x = sa_v_in_8)[name = tensor<string, []>("cast_41")];
            tensor<fp16, [1, 600, 12, 64]> var_2687_cast_fp16 = mul(x = sa_v_in_8_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_2687_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_2688_cast_fp16 = mul(x = new_v_17_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_2688_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_17_cast_fp16 = add(x = var_2687_cast_fp16, y = var_2688_cast_fp16)[name = tensor<string, []>("sa_v_out_17_cast_fp16")];
            tensor<string, []> sa_v_out_17_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_17_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_2707 = const()[name = tensor<string, []>("op_2707"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_2709_transpose_x_0 = const()[name = tensor<string, []>("op_2709_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2709_transpose_y_0 = const()[name = tensor<string, []>("op_2709_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_128_perm_0 = const()[name = tensor<string, []>("transpose_128_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_129_perm_0 = const()[name = tensor<string, []>("transpose_129_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_129 = transpose(perm = transpose_129_perm_0, x = sa_k_out_17_cast_fp16)[name = tensor<string, []>("transpose_182")];
            tensor<fp16, [1, 12, 1, 64]> transpose_128 = transpose(perm = transpose_128_perm_0, x = q_17_cast_fp16)[name = tensor<string, []>("transpose_183")];
            tensor<fp16, [1, 12, 1, 600]> var_2709_cast_fp16 = matmul(transpose_x = var_2709_transpose_x_0, transpose_y = var_2709_transpose_y_0, x = transpose_128, y = transpose_129)[name = tensor<string, []>("op_2709_cast_fp16")];
            tensor<fp16, []> var_2710_to_fp16 = const()[name = tensor<string, []>("op_2710_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_33_cast_fp16 = mul(x = var_2709_cast_fp16, y = var_2710_to_fp16)[name = tensor<string, []>("scores_33_cast_fp16")];
            tensor<fp16, []> var_2728_to_fp16 = const()[name = tensor<string, []>("op_2728_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_35_cast_fp16 = select(a = var_2728_to_fp16, b = scores_33_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_35_cast_fp16")];
            tensor<int32, []> var_2730 = const()[name = tensor<string, []>("op_2730"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_17_cast_fp16 = softmax(axis = var_2730, x = scores_35_cast_fp16)[name = tensor<string, []>("probs_17_cast_fp16")];
            tensor<bool, []> var_2733_transpose_x_0 = const()[name = tensor<string, []>("op_2733_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2733_transpose_y_0 = const()[name = tensor<string, []>("op_2733_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_17_cast_fp16 = transpose(perm = var_2707, x = sa_v_out_17_cast_fp16)[name = tensor<string, []>("transpose_181")];
            tensor<fp16, [1, 12, 1, 64]> var_2733_cast_fp16 = matmul(transpose_x = var_2733_transpose_x_0, transpose_y = var_2733_transpose_y_0, x = probs_17_cast_fp16, y = v_t_17_cast_fp16)[name = tensor<string, []>("op_2733_cast_fp16")];
            tensor<int32, [4]> var_2738 = const()[name = tensor<string, []>("op_2738"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_2743 = const()[name = tensor<string, []>("op_2743"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_2739_cast_fp16 = transpose(perm = var_2738, x = var_2733_cast_fp16)[name = tensor<string, []>("transpose_180")];
            tensor<fp16, [1, 1, 768]> input_119_cast_fp16 = reshape(shape = var_2743, x = var_2739_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_8_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(61734912))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62324800))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_33_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_8_self_attention_o_net_weight_to_fp16_quantized, x = input_119_cast_fp16)[name = tensor<string, []>("linear_33_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_121_cast_fp16 = add(x = input_115_cast_fp16, y = linear_33_cast_fp16)[name = tensor<string, []>("input_121_cast_fp16")];
            tensor<int32, [1]> input_123_axes_0 = const()[name = tensor<string, []>("input_123_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_8_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_8_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62326400)))];
            tensor<fp16, []> var_2751_to_fp16 = const()[name = tensor<string, []>("op_2751_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_123_cast_fp16 = layer_norm(axes = input_123_axes_0, epsilon = var_2751_to_fp16, gamma = dec_layers_8_norm_xattn_query_weight_to_fp16, x = input_121_cast_fp16)[name = tensor<string, []>("input_123_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62328000))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62426368))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_34_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = tensor<string, []>("linear_34_cast_fp16")];
            tensor<int32, [4]> var_2764 = const()[name = tensor<string, []>("op_2764"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_17_cast_fp16 = reshape(shape = var_2764, x = linear_34_cast_fp16)[name = tensor<string, []>("xq_proj_17_cast_fp16")];
            tensor<int32, [4]> var_2782 = const()[name = tensor<string, []>("op_2782"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_8_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_8_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_2784_transpose_x_0 = const()[name = tensor<string, []>("op_2784_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2784_transpose_y_0 = const()[name = tensor<string, []>("op_2784_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_8_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_8_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_130_perm_0 = const()[name = tensor<string, []>("transpose_130_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_131_perm_0 = const()[name = tensor<string, []>("transpose_131_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_8_to_fp16 = cast(dtype = xa_k_8_to_fp16_dtype_0, x = xa_k_8)[name = tensor<string, []>("cast_40")];
            tensor<fp16, [1, 1, 128, 256]> transpose_131 = transpose(perm = transpose_131_perm_0, x = xa_k_8_to_fp16)[name = tensor<string, []>("transpose_178")];
            tensor<fp16, [1, 1, 1, 128]> transpose_130 = transpose(perm = transpose_130_perm_0, x = xq_proj_17_cast_fp16)[name = tensor<string, []>("transpose_179")];
            tensor<fp16, [1, 1, 1, 256]> var_2784_cast_fp16 = matmul(transpose_x = var_2784_transpose_x_0, transpose_y = var_2784_transpose_y_0, x = transpose_130, y = transpose_131)[name = tensor<string, []>("op_2784_cast_fp16")];
            tensor<fp16, []> var_2785_to_fp16 = const()[name = tensor<string, []>("op_2785_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_33_cast_fp16 = mul(x = var_2784_cast_fp16, y = var_2785_to_fp16)[name = tensor<string, []>("xscores_33_cast_fp16")];
            tensor<fp16, []> var_2803_to_fp16 = const()[name = tensor<string, []>("op_2803_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_35_cast_fp16 = select(a = var_2803_to_fp16, b = xscores_33_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_35_cast_fp16")];
            tensor<int32, []> var_2805 = const()[name = tensor<string, []>("op_2805"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_17_cast_fp16 = softmax(axis = var_2805, x = xscores_35_cast_fp16)[name = tensor<string, []>("xprobs_17_cast_fp16")];
            tensor<bool, []> var_2808_transpose_x_0 = const()[name = tensor<string, []>("op_2808_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2808_transpose_y_0 = const()[name = tensor<string, []>("op_2808_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_8_to_fp16 = cast(dtype = xa_v_8_to_fp16_dtype_0, x = xa_v_8)[name = tensor<string, []>("cast_39")];
            tensor<fp16, [1, 1, 256, 128]> xvT_17_cast_fp16 = transpose(perm = var_2782, x = xa_v_8_to_fp16)[name = tensor<string, []>("transpose_177")];
            tensor<fp16, [1, 1, 1, 128]> var_2808_cast_fp16 = matmul(transpose_x = var_2808_transpose_x_0, transpose_y = var_2808_transpose_y_0, x = xprobs_17_cast_fp16, y = xvT_17_cast_fp16)[name = tensor<string, []>("op_2808_cast_fp16")];
            tensor<int32, [4]> var_2813 = const()[name = tensor<string, []>("op_2813"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_2818 = const()[name = tensor<string, []>("op_2818"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_2814_cast_fp16 = transpose(perm = var_2813, x = var_2808_cast_fp16)[name = tensor<string, []>("transpose_176")];
            tensor<fp16, [1, 1, 128]> input_125_cast_fp16 = reshape(shape = var_2818, x = var_2814_cast_fp16)[name = tensor<string, []>("input_125_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62426688))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62525056))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_35_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized, x = input_125_cast_fp16)[name = tensor<string, []>("linear_35_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_127_cast_fp16 = add(x = input_121_cast_fp16, y = linear_35_cast_fp16)[name = tensor<string, []>("input_127_cast_fp16")];
            tensor<int32, [1]> x_65_axes_0 = const()[name = tensor<string, []>("x_65_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_8_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_8_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62526656)))];
            tensor<fp16, []> var_2826_to_fp16 = const()[name = tensor<string, []>("op_2826_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_65_cast_fp16 = layer_norm(axes = x_65_axes_0, epsilon = var_2826_to_fp16, gamma = dec_layers_8_norm_pos_ff_weight_to_fp16, x = input_127_cast_fp16)[name = tensor<string, []>("x_65_cast_fp16")];
            tensor<int32, [3]> var_2842 = const()[name = tensor<string, []>("op_2842"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_33_pad_type_0 = const()[name = tensor<string, []>("y_33_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_33_strides_0 = const()[name = tensor<string, []>("y_33_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_33_pad_0 = const()[name = tensor<string, []>("y_33_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_33_dilations_0 = const()[name = tensor<string, []>("y_33_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_33_groups_0 = const()[name = tensor<string, []>("y_33_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_8_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62528256))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64887616))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_67_cast_fp16 = transpose(perm = var_2842, x = x_65_cast_fp16)[name = tensor<string, []>("transpose_175")];
            tensor<fp16, [1, 3072, 1]> y_33_cast_fp16 = conv(dilations = y_33_dilations_0, groups = y_33_groups_0, pad = y_33_pad_0, pad_type = y_33_pad_type_0, strides = y_33_strides_0, weight = dec_layers_8_pos_ff_proj_weight_to_fp16_quantized, x = x_67_cast_fp16)[name = tensor<string, []>("y_33_cast_fp16")];
            tensor<string, []> x_69_mode_0 = const()[name = tensor<string, []>("x_69_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_69_cast_fp16 = gelu(mode = x_69_mode_0, x = y_33_cast_fp16)[name = tensor<string, []>("x_69_cast_fp16")];
            tensor<string, []> y_35_pad_type_0 = const()[name = tensor<string, []>("y_35_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_35_strides_0 = const()[name = tensor<string, []>("y_35_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_35_pad_0 = const()[name = tensor<string, []>("y_35_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_35_dilations_0 = const()[name = tensor<string, []>("y_35_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_35_groups_0 = const()[name = tensor<string, []>("y_35_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64893824))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67253184))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_35_cast_fp16 = conv(dilations = y_35_dilations_0, groups = y_35_groups_0, pad = y_35_pad_0, pad_type = y_35_pad_type_0, strides = y_35_strides_0, weight = dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized, x = x_69_cast_fp16)[name = tensor<string, []>("y_35_cast_fp16")];
            tensor<int32, [3]> var_2860 = const()[name = tensor<string, []>("op_2860"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_2861_cast_fp16 = transpose(perm = var_2860, x = y_35_cast_fp16)[name = tensor<string, []>("transpose_174")];
            tensor<fp16, [1, 1, 768]> input_129_cast_fp16 = add(x = input_127_cast_fp16, y = var_2861_cast_fp16)[name = tensor<string, []>("input_129_cast_fp16")];
            tensor<int32, [1]> input_131_axes_0 = const()[name = tensor<string, []>("input_131_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_9_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_9_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67254784)))];
            tensor<fp16, []> var_2865_to_fp16 = const()[name = tensor<string, []>("op_2865_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_131_cast_fp16 = layer_norm(axes = input_131_axes_0, epsilon = var_2865_to_fp16, gamma = dec_layers_9_norm_self_weight_to_fp16, x = input_129_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67256384))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69025920))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_36_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized, x = input_131_cast_fp16)[name = tensor<string, []>("linear_36_cast_fp16")];
            tensor<int32, [5]> var_2879 = const()[name = tensor<string, []>("op_2879"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_39_cast_fp16 = reshape(shape = var_2879, x = linear_36_cast_fp16)[name = tensor<string, []>("qkv_39_cast_fp16")];
            tensor<int32, [5]> q_19_begin_0 = const()[name = tensor<string, []>("q_19_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_19_end_0 = const()[name = tensor<string, []>("q_19_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_19_end_mask_0 = const()[name = tensor<string, []>("q_19_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_19_squeeze_mask_0 = const()[name = tensor<string, []>("q_19_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_19_cast_fp16 = slice_by_index(begin = q_19_begin_0, end = q_19_end_0, end_mask = q_19_end_mask_0, squeeze_mask = q_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor<string, []>("q_19_cast_fp16")];
            tensor<int32, [5]> new_k_19_begin_0 = const()[name = tensor<string, []>("new_k_19_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_19_end_0 = const()[name = tensor<string, []>("new_k_19_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_19_end_mask_0 = const()[name = tensor<string, []>("new_k_19_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_19_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_19_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_19_cast_fp16 = slice_by_index(begin = new_k_19_begin_0, end = new_k_19_end_0, end_mask = new_k_19_end_mask_0, squeeze_mask = new_k_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor<string, []>("new_k_19_cast_fp16")];
            tensor<int32, [5]> new_v_19_begin_0 = const()[name = tensor<string, []>("new_v_19_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_19_end_0 = const()[name = tensor<string, []>("new_v_19_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_19_end_mask_0 = const()[name = tensor<string, []>("new_v_19_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_19_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_19_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_19_cast_fp16 = slice_by_index(begin = new_v_19_begin_0, end = new_v_19_end_0, end_mask = new_v_19_end_mask_0, squeeze_mask = new_v_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor<string, []>("new_v_19_cast_fp16")];
            tensor<string, []> sa_k_in_9_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_9_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_9_to_fp16 = cast(dtype = sa_k_in_9_to_fp16_dtype_0, x = sa_k_in_9)[name = tensor<string, []>("cast_38")];
            tensor<fp16, [1, 600, 12, 64]> var_2940_cast_fp16 = mul(x = sa_k_in_9_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_2940_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_2941_cast_fp16 = mul(x = new_k_19_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_2941_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_19_cast_fp16 = add(x = var_2940_cast_fp16, y = var_2941_cast_fp16)[name = tensor<string, []>("sa_k_out_19_cast_fp16")];
            tensor<string, []> sa_k_out_19_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_19_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_9_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_9_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_9_to_fp16 = cast(dtype = sa_v_in_9_to_fp16_dtype_0, x = sa_v_in_9)[name = tensor<string, []>("cast_37")];
            tensor<fp16, [1, 600, 12, 64]> var_2947_cast_fp16 = mul(x = sa_v_in_9_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_2947_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_2948_cast_fp16 = mul(x = new_v_19_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_2948_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_19_cast_fp16 = add(x = var_2947_cast_fp16, y = var_2948_cast_fp16)[name = tensor<string, []>("sa_v_out_19_cast_fp16")];
            tensor<string, []> sa_v_out_19_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_19_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_2967 = const()[name = tensor<string, []>("op_2967"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_2969_transpose_x_0 = const()[name = tensor<string, []>("op_2969_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2969_transpose_y_0 = const()[name = tensor<string, []>("op_2969_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_132_perm_0 = const()[name = tensor<string, []>("transpose_132_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_133_perm_0 = const()[name = tensor<string, []>("transpose_133_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_133 = transpose(perm = transpose_133_perm_0, x = sa_k_out_19_cast_fp16)[name = tensor<string, []>("transpose_172")];
            tensor<fp16, [1, 12, 1, 64]> transpose_132 = transpose(perm = transpose_132_perm_0, x = q_19_cast_fp16)[name = tensor<string, []>("transpose_173")];
            tensor<fp16, [1, 12, 1, 600]> var_2969_cast_fp16 = matmul(transpose_x = var_2969_transpose_x_0, transpose_y = var_2969_transpose_y_0, x = transpose_132, y = transpose_133)[name = tensor<string, []>("op_2969_cast_fp16")];
            tensor<fp16, []> var_2970_to_fp16 = const()[name = tensor<string, []>("op_2970_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_37_cast_fp16 = mul(x = var_2969_cast_fp16, y = var_2970_to_fp16)[name = tensor<string, []>("scores_37_cast_fp16")];
            tensor<fp16, []> var_2988_to_fp16 = const()[name = tensor<string, []>("op_2988_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_39_cast_fp16 = select(a = var_2988_to_fp16, b = scores_37_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_39_cast_fp16")];
            tensor<int32, []> var_2990 = const()[name = tensor<string, []>("op_2990"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_19_cast_fp16 = softmax(axis = var_2990, x = scores_39_cast_fp16)[name = tensor<string, []>("probs_19_cast_fp16")];
            tensor<bool, []> var_2993_transpose_x_0 = const()[name = tensor<string, []>("op_2993_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_2993_transpose_y_0 = const()[name = tensor<string, []>("op_2993_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_19_cast_fp16 = transpose(perm = var_2967, x = sa_v_out_19_cast_fp16)[name = tensor<string, []>("transpose_171")];
            tensor<fp16, [1, 12, 1, 64]> var_2993_cast_fp16 = matmul(transpose_x = var_2993_transpose_x_0, transpose_y = var_2993_transpose_y_0, x = probs_19_cast_fp16, y = v_t_19_cast_fp16)[name = tensor<string, []>("op_2993_cast_fp16")];
            tensor<int32, [4]> var_2998 = const()[name = tensor<string, []>("op_2998"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_3003 = const()[name = tensor<string, []>("op_3003"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_2999_cast_fp16 = transpose(perm = var_2998, x = var_2993_cast_fp16)[name = tensor<string, []>("transpose_170")];
            tensor<fp16, [1, 1, 768]> input_133_cast_fp16 = reshape(shape = var_3003, x = var_2999_cast_fp16)[name = tensor<string, []>("input_133_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_9_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69030592))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69620480))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_9_self_attention_o_net_weight_to_fp16_quantized, x = input_133_cast_fp16)[name = tensor<string, []>("linear_37_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_135_cast_fp16 = add(x = input_129_cast_fp16, y = linear_37_cast_fp16)[name = tensor<string, []>("input_135_cast_fp16")];
            tensor<int32, [1]> input_137_axes_0 = const()[name = tensor<string, []>("input_137_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_9_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_9_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69622080)))];
            tensor<fp16, []> var_3011_to_fp16 = const()[name = tensor<string, []>("op_3011_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_137_cast_fp16 = layer_norm(axes = input_137_axes_0, epsilon = var_3011_to_fp16, gamma = dec_layers_9_norm_xattn_query_weight_to_fp16, x = input_135_cast_fp16)[name = tensor<string, []>("input_137_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69623680))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69722048))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_38_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized, x = input_137_cast_fp16)[name = tensor<string, []>("linear_38_cast_fp16")];
            tensor<int32, [4]> var_3024 = const()[name = tensor<string, []>("op_3024"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_19_cast_fp16 = reshape(shape = var_3024, x = linear_38_cast_fp16)[name = tensor<string, []>("xq_proj_19_cast_fp16")];
            tensor<int32, [4]> var_3042 = const()[name = tensor<string, []>("op_3042"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_9_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_9_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_3044_transpose_x_0 = const()[name = tensor<string, []>("op_3044_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3044_transpose_y_0 = const()[name = tensor<string, []>("op_3044_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_9_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_9_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_134_perm_0 = const()[name = tensor<string, []>("transpose_134_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_135_perm_0 = const()[name = tensor<string, []>("transpose_135_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_9_to_fp16 = cast(dtype = xa_k_9_to_fp16_dtype_0, x = xa_k_9)[name = tensor<string, []>("cast_36")];
            tensor<fp16, [1, 1, 128, 256]> transpose_135 = transpose(perm = transpose_135_perm_0, x = xa_k_9_to_fp16)[name = tensor<string, []>("transpose_168")];
            tensor<fp16, [1, 1, 1, 128]> transpose_134 = transpose(perm = transpose_134_perm_0, x = xq_proj_19_cast_fp16)[name = tensor<string, []>("transpose_169")];
            tensor<fp16, [1, 1, 1, 256]> var_3044_cast_fp16 = matmul(transpose_x = var_3044_transpose_x_0, transpose_y = var_3044_transpose_y_0, x = transpose_134, y = transpose_135)[name = tensor<string, []>("op_3044_cast_fp16")];
            tensor<fp16, []> var_3045_to_fp16 = const()[name = tensor<string, []>("op_3045_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_37_cast_fp16 = mul(x = var_3044_cast_fp16, y = var_3045_to_fp16)[name = tensor<string, []>("xscores_37_cast_fp16")];
            tensor<fp16, []> var_3063_to_fp16 = const()[name = tensor<string, []>("op_3063_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_39_cast_fp16 = select(a = var_3063_to_fp16, b = xscores_37_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_39_cast_fp16")];
            tensor<int32, []> var_3065 = const()[name = tensor<string, []>("op_3065"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_19_cast_fp16 = softmax(axis = var_3065, x = xscores_39_cast_fp16)[name = tensor<string, []>("xprobs_19_cast_fp16")];
            tensor<bool, []> var_3068_transpose_x_0 = const()[name = tensor<string, []>("op_3068_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3068_transpose_y_0 = const()[name = tensor<string, []>("op_3068_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_9_to_fp16 = cast(dtype = xa_v_9_to_fp16_dtype_0, x = xa_v_9)[name = tensor<string, []>("cast_35")];
            tensor<fp16, [1, 1, 256, 128]> xvT_19_cast_fp16 = transpose(perm = var_3042, x = xa_v_9_to_fp16)[name = tensor<string, []>("transpose_167")];
            tensor<fp16, [1, 1, 1, 128]> var_3068_cast_fp16 = matmul(transpose_x = var_3068_transpose_x_0, transpose_y = var_3068_transpose_y_0, x = xprobs_19_cast_fp16, y = xvT_19_cast_fp16)[name = tensor<string, []>("op_3068_cast_fp16")];
            tensor<int32, [4]> var_3073 = const()[name = tensor<string, []>("op_3073"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_3078 = const()[name = tensor<string, []>("op_3078"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_3074_cast_fp16 = transpose(perm = var_3073, x = var_3068_cast_fp16)[name = tensor<string, []>("transpose_166")];
            tensor<fp16, [1, 1, 128]> input_139_cast_fp16 = reshape(shape = var_3078, x = var_3074_cast_fp16)[name = tensor<string, []>("input_139_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69722368))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69820736))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_39_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized, x = input_139_cast_fp16)[name = tensor<string, []>("linear_39_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_141_cast_fp16 = add(x = input_135_cast_fp16, y = linear_39_cast_fp16)[name = tensor<string, []>("input_141_cast_fp16")];
            tensor<int32, [1]> x_73_axes_0 = const()[name = tensor<string, []>("x_73_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_9_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_9_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69822336)))];
            tensor<fp16, []> var_3086_to_fp16 = const()[name = tensor<string, []>("op_3086_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_73_cast_fp16 = layer_norm(axes = x_73_axes_0, epsilon = var_3086_to_fp16, gamma = dec_layers_9_norm_pos_ff_weight_to_fp16, x = input_141_cast_fp16)[name = tensor<string, []>("x_73_cast_fp16")];
            tensor<int32, [3]> var_3102 = const()[name = tensor<string, []>("op_3102"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_37_pad_type_0 = const()[name = tensor<string, []>("y_37_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_37_strides_0 = const()[name = tensor<string, []>("y_37_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_37_pad_0 = const()[name = tensor<string, []>("y_37_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_37_dilations_0 = const()[name = tensor<string, []>("y_37_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_37_groups_0 = const()[name = tensor<string, []>("y_37_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_9_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69823936))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72183296))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_75_cast_fp16 = transpose(perm = var_3102, x = x_73_cast_fp16)[name = tensor<string, []>("transpose_165")];
            tensor<fp16, [1, 3072, 1]> y_37_cast_fp16 = conv(dilations = y_37_dilations_0, groups = y_37_groups_0, pad = y_37_pad_0, pad_type = y_37_pad_type_0, strides = y_37_strides_0, weight = dec_layers_9_pos_ff_proj_weight_to_fp16_quantized, x = x_75_cast_fp16)[name = tensor<string, []>("y_37_cast_fp16")];
            tensor<string, []> x_77_mode_0 = const()[name = tensor<string, []>("x_77_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = y_37_cast_fp16)[name = tensor<string, []>("x_77_cast_fp16")];
            tensor<string, []> y_39_pad_type_0 = const()[name = tensor<string, []>("y_39_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_39_strides_0 = const()[name = tensor<string, []>("y_39_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_39_pad_0 = const()[name = tensor<string, []>("y_39_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_39_dilations_0 = const()[name = tensor<string, []>("y_39_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_39_groups_0 = const()[name = tensor<string, []>("y_39_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(72189504))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74548864))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_39_cast_fp16 = conv(dilations = y_39_dilations_0, groups = y_39_groups_0, pad = y_39_pad_0, pad_type = y_39_pad_type_0, strides = y_39_strides_0, weight = dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized, x = x_77_cast_fp16)[name = tensor<string, []>("y_39_cast_fp16")];
            tensor<int32, [3]> var_3120 = const()[name = tensor<string, []>("op_3120"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_3121_cast_fp16 = transpose(perm = var_3120, x = y_39_cast_fp16)[name = tensor<string, []>("transpose_164")];
            tensor<fp16, [1, 1, 768]> input_143_cast_fp16 = add(x = input_141_cast_fp16, y = var_3121_cast_fp16)[name = tensor<string, []>("input_143_cast_fp16")];
            tensor<int32, [1]> input_145_axes_0 = const()[name = tensor<string, []>("input_145_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_10_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_10_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74550464)))];
            tensor<fp16, []> var_3125_to_fp16 = const()[name = tensor<string, []>("op_3125_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_145_cast_fp16 = layer_norm(axes = input_145_axes_0, epsilon = var_3125_to_fp16, gamma = dec_layers_10_norm_self_weight_to_fp16, x = input_143_cast_fp16)[name = tensor<string, []>("input_145_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(74552064))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(76321600))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_40_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized, x = input_145_cast_fp16)[name = tensor<string, []>("linear_40_cast_fp16")];
            tensor<int32, [5]> var_3139 = const()[name = tensor<string, []>("op_3139"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_43_cast_fp16 = reshape(shape = var_3139, x = linear_40_cast_fp16)[name = tensor<string, []>("qkv_43_cast_fp16")];
            tensor<int32, [5]> q_21_begin_0 = const()[name = tensor<string, []>("q_21_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_21_end_0 = const()[name = tensor<string, []>("q_21_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_21_end_mask_0 = const()[name = tensor<string, []>("q_21_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_21_squeeze_mask_0 = const()[name = tensor<string, []>("q_21_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_21_cast_fp16 = slice_by_index(begin = q_21_begin_0, end = q_21_end_0, end_mask = q_21_end_mask_0, squeeze_mask = q_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor<string, []>("q_21_cast_fp16")];
            tensor<int32, [5]> new_k_21_begin_0 = const()[name = tensor<string, []>("new_k_21_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_21_end_0 = const()[name = tensor<string, []>("new_k_21_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_21_end_mask_0 = const()[name = tensor<string, []>("new_k_21_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_21_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_21_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_21_cast_fp16 = slice_by_index(begin = new_k_21_begin_0, end = new_k_21_end_0, end_mask = new_k_21_end_mask_0, squeeze_mask = new_k_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor<string, []>("new_k_21_cast_fp16")];
            tensor<int32, [5]> new_v_21_begin_0 = const()[name = tensor<string, []>("new_v_21_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_21_end_0 = const()[name = tensor<string, []>("new_v_21_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_21_end_mask_0 = const()[name = tensor<string, []>("new_v_21_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_21_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_21_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_21_cast_fp16 = slice_by_index(begin = new_v_21_begin_0, end = new_v_21_end_0, end_mask = new_v_21_end_mask_0, squeeze_mask = new_v_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor<string, []>("new_v_21_cast_fp16")];
            tensor<string, []> sa_k_in_10_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_10_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_10_to_fp16 = cast(dtype = sa_k_in_10_to_fp16_dtype_0, x = sa_k_in_10)[name = tensor<string, []>("cast_34")];
            tensor<fp16, [1, 600, 12, 64]> var_3200_cast_fp16 = mul(x = sa_k_in_10_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_3200_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_3201_cast_fp16 = mul(x = new_k_21_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_3201_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_21_cast_fp16 = add(x = var_3200_cast_fp16, y = var_3201_cast_fp16)[name = tensor<string, []>("sa_k_out_21_cast_fp16")];
            tensor<string, []> sa_k_out_21_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_21_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_10_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_10_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_10_to_fp16 = cast(dtype = sa_v_in_10_to_fp16_dtype_0, x = sa_v_in_10)[name = tensor<string, []>("cast_33")];
            tensor<fp16, [1, 600, 12, 64]> var_3207_cast_fp16 = mul(x = sa_v_in_10_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_3207_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_3208_cast_fp16 = mul(x = new_v_21_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_3208_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_21_cast_fp16 = add(x = var_3207_cast_fp16, y = var_3208_cast_fp16)[name = tensor<string, []>("sa_v_out_21_cast_fp16")];
            tensor<string, []> sa_v_out_21_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_21_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_3227 = const()[name = tensor<string, []>("op_3227"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_3229_transpose_x_0 = const()[name = tensor<string, []>("op_3229_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3229_transpose_y_0 = const()[name = tensor<string, []>("op_3229_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_136_perm_0 = const()[name = tensor<string, []>("transpose_136_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_137_perm_0 = const()[name = tensor<string, []>("transpose_137_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_137 = transpose(perm = transpose_137_perm_0, x = sa_k_out_21_cast_fp16)[name = tensor<string, []>("transpose_162")];
            tensor<fp16, [1, 12, 1, 64]> transpose_136 = transpose(perm = transpose_136_perm_0, x = q_21_cast_fp16)[name = tensor<string, []>("transpose_163")];
            tensor<fp16, [1, 12, 1, 600]> var_3229_cast_fp16 = matmul(transpose_x = var_3229_transpose_x_0, transpose_y = var_3229_transpose_y_0, x = transpose_136, y = transpose_137)[name = tensor<string, []>("op_3229_cast_fp16")];
            tensor<fp16, []> var_3230_to_fp16 = const()[name = tensor<string, []>("op_3230_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_41_cast_fp16 = mul(x = var_3229_cast_fp16, y = var_3230_to_fp16)[name = tensor<string, []>("scores_41_cast_fp16")];
            tensor<fp16, []> var_3248_to_fp16 = const()[name = tensor<string, []>("op_3248_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_43_cast_fp16 = select(a = var_3248_to_fp16, b = scores_41_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_43_cast_fp16")];
            tensor<int32, []> var_3250 = const()[name = tensor<string, []>("op_3250"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_21_cast_fp16 = softmax(axis = var_3250, x = scores_43_cast_fp16)[name = tensor<string, []>("probs_21_cast_fp16")];
            tensor<bool, []> var_3253_transpose_x_0 = const()[name = tensor<string, []>("op_3253_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3253_transpose_y_0 = const()[name = tensor<string, []>("op_3253_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_21_cast_fp16 = transpose(perm = var_3227, x = sa_v_out_21_cast_fp16)[name = tensor<string, []>("transpose_161")];
            tensor<fp16, [1, 12, 1, 64]> var_3253_cast_fp16 = matmul(transpose_x = var_3253_transpose_x_0, transpose_y = var_3253_transpose_y_0, x = probs_21_cast_fp16, y = v_t_21_cast_fp16)[name = tensor<string, []>("op_3253_cast_fp16")];
            tensor<int32, [4]> var_3258 = const()[name = tensor<string, []>("op_3258"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_3263 = const()[name = tensor<string, []>("op_3263"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_3259_cast_fp16 = transpose(perm = var_3258, x = var_3253_cast_fp16)[name = tensor<string, []>("transpose_160")];
            tensor<fp16, [1, 1, 768]> input_147_cast_fp16 = reshape(shape = var_3263, x = var_3259_cast_fp16)[name = tensor<string, []>("input_147_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_10_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(76326272))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(76916160))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_10_self_attention_o_net_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = tensor<string, []>("linear_41_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_149_cast_fp16 = add(x = input_143_cast_fp16, y = linear_41_cast_fp16)[name = tensor<string, []>("input_149_cast_fp16")];
            tensor<int32, [1]> input_151_axes_0 = const()[name = tensor<string, []>("input_151_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_10_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_10_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(76917760)))];
            tensor<fp16, []> var_3271_to_fp16 = const()[name = tensor<string, []>("op_3271_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_151_cast_fp16 = layer_norm(axes = input_151_axes_0, epsilon = var_3271_to_fp16, gamma = dec_layers_10_norm_xattn_query_weight_to_fp16, x = input_149_cast_fp16)[name = tensor<string, []>("input_151_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(76919360))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77017728))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_42_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized, x = input_151_cast_fp16)[name = tensor<string, []>("linear_42_cast_fp16")];
            tensor<int32, [4]> var_3284 = const()[name = tensor<string, []>("op_3284"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_21_cast_fp16 = reshape(shape = var_3284, x = linear_42_cast_fp16)[name = tensor<string, []>("xq_proj_21_cast_fp16")];
            tensor<int32, [4]> var_3302 = const()[name = tensor<string, []>("op_3302"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_10_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_10_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_3304_transpose_x_0 = const()[name = tensor<string, []>("op_3304_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3304_transpose_y_0 = const()[name = tensor<string, []>("op_3304_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_10_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_10_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_138_perm_0 = const()[name = tensor<string, []>("transpose_138_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_139_perm_0 = const()[name = tensor<string, []>("transpose_139_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_10_to_fp16 = cast(dtype = xa_k_10_to_fp16_dtype_0, x = xa_k_10)[name = tensor<string, []>("cast_32")];
            tensor<fp16, [1, 1, 128, 256]> transpose_139 = transpose(perm = transpose_139_perm_0, x = xa_k_10_to_fp16)[name = tensor<string, []>("transpose_158")];
            tensor<fp16, [1, 1, 1, 128]> transpose_138 = transpose(perm = transpose_138_perm_0, x = xq_proj_21_cast_fp16)[name = tensor<string, []>("transpose_159")];
            tensor<fp16, [1, 1, 1, 256]> var_3304_cast_fp16 = matmul(transpose_x = var_3304_transpose_x_0, transpose_y = var_3304_transpose_y_0, x = transpose_138, y = transpose_139)[name = tensor<string, []>("op_3304_cast_fp16")];
            tensor<fp16, []> var_3305_to_fp16 = const()[name = tensor<string, []>("op_3305_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_41_cast_fp16 = mul(x = var_3304_cast_fp16, y = var_3305_to_fp16)[name = tensor<string, []>("xscores_41_cast_fp16")];
            tensor<fp16, []> var_3323_to_fp16 = const()[name = tensor<string, []>("op_3323_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_43_cast_fp16 = select(a = var_3323_to_fp16, b = xscores_41_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_43_cast_fp16")];
            tensor<int32, []> var_3325 = const()[name = tensor<string, []>("op_3325"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_21_cast_fp16 = softmax(axis = var_3325, x = xscores_43_cast_fp16)[name = tensor<string, []>("xprobs_21_cast_fp16")];
            tensor<bool, []> var_3328_transpose_x_0 = const()[name = tensor<string, []>("op_3328_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3328_transpose_y_0 = const()[name = tensor<string, []>("op_3328_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_10_to_fp16 = cast(dtype = xa_v_10_to_fp16_dtype_0, x = xa_v_10)[name = tensor<string, []>("cast_31")];
            tensor<fp16, [1, 1, 256, 128]> xvT_21_cast_fp16 = transpose(perm = var_3302, x = xa_v_10_to_fp16)[name = tensor<string, []>("transpose_157")];
            tensor<fp16, [1, 1, 1, 128]> var_3328_cast_fp16 = matmul(transpose_x = var_3328_transpose_x_0, transpose_y = var_3328_transpose_y_0, x = xprobs_21_cast_fp16, y = xvT_21_cast_fp16)[name = tensor<string, []>("op_3328_cast_fp16")];
            tensor<int32, [4]> var_3333 = const()[name = tensor<string, []>("op_3333"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_3338 = const()[name = tensor<string, []>("op_3338"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_3334_cast_fp16 = transpose(perm = var_3333, x = var_3328_cast_fp16)[name = tensor<string, []>("transpose_156")];
            tensor<fp16, [1, 1, 128]> input_153_cast_fp16 = reshape(shape = var_3338, x = var_3334_cast_fp16)[name = tensor<string, []>("input_153_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77018048))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77116416))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized, x = input_153_cast_fp16)[name = tensor<string, []>("linear_43_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_155_cast_fp16 = add(x = input_149_cast_fp16, y = linear_43_cast_fp16)[name = tensor<string, []>("input_155_cast_fp16")];
            tensor<int32, [1]> x_81_axes_0 = const()[name = tensor<string, []>("x_81_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_10_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_10_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77118016)))];
            tensor<fp16, []> var_3346_to_fp16 = const()[name = tensor<string, []>("op_3346_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_81_cast_fp16 = layer_norm(axes = x_81_axes_0, epsilon = var_3346_to_fp16, gamma = dec_layers_10_norm_pos_ff_weight_to_fp16, x = input_155_cast_fp16)[name = tensor<string, []>("x_81_cast_fp16")];
            tensor<int32, [3]> var_3362 = const()[name = tensor<string, []>("op_3362"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_41_pad_type_0 = const()[name = tensor<string, []>("y_41_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_41_strides_0 = const()[name = tensor<string, []>("y_41_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_41_pad_0 = const()[name = tensor<string, []>("y_41_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_41_dilations_0 = const()[name = tensor<string, []>("y_41_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_41_groups_0 = const()[name = tensor<string, []>("y_41_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_10_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77119616))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79478976))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_83_cast_fp16 = transpose(perm = var_3362, x = x_81_cast_fp16)[name = tensor<string, []>("transpose_155")];
            tensor<fp16, [1, 3072, 1]> y_41_cast_fp16 = conv(dilations = y_41_dilations_0, groups = y_41_groups_0, pad = y_41_pad_0, pad_type = y_41_pad_type_0, strides = y_41_strides_0, weight = dec_layers_10_pos_ff_proj_weight_to_fp16_quantized, x = x_83_cast_fp16)[name = tensor<string, []>("y_41_cast_fp16")];
            tensor<string, []> x_85_mode_0 = const()[name = tensor<string, []>("x_85_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_85_cast_fp16 = gelu(mode = x_85_mode_0, x = y_41_cast_fp16)[name = tensor<string, []>("x_85_cast_fp16")];
            tensor<string, []> y_43_pad_type_0 = const()[name = tensor<string, []>("y_43_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_43_strides_0 = const()[name = tensor<string, []>("y_43_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_43_pad_0 = const()[name = tensor<string, []>("y_43_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_43_dilations_0 = const()[name = tensor<string, []>("y_43_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_43_groups_0 = const()[name = tensor<string, []>("y_43_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(79485184))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81844544))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_43_cast_fp16 = conv(dilations = y_43_dilations_0, groups = y_43_groups_0, pad = y_43_pad_0, pad_type = y_43_pad_type_0, strides = y_43_strides_0, weight = dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized, x = x_85_cast_fp16)[name = tensor<string, []>("y_43_cast_fp16")];
            tensor<int32, [3]> var_3380 = const()[name = tensor<string, []>("op_3380"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_3381_cast_fp16 = transpose(perm = var_3380, x = y_43_cast_fp16)[name = tensor<string, []>("transpose_154")];
            tensor<fp16, [1, 1, 768]> input_157_cast_fp16 = add(x = input_155_cast_fp16, y = var_3381_cast_fp16)[name = tensor<string, []>("input_157_cast_fp16")];
            tensor<int32, [1]> input_159_axes_0 = const()[name = tensor<string, []>("input_159_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_11_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_11_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81846144)))];
            tensor<fp16, []> var_3385_to_fp16 = const()[name = tensor<string, []>("op_3385_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_159_cast_fp16 = layer_norm(axes = input_159_axes_0, epsilon = var_3385_to_fp16, gamma = dec_layers_11_norm_self_weight_to_fp16, x = input_157_cast_fp16)[name = tensor<string, []>("input_159_cast_fp16")];
            tensor<fp16, [2304, 768]> dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(81847744))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83617280))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3351680)))];
            tensor<fp16, [1, 1, 2304]> linear_44_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized, x = input_159_cast_fp16)[name = tensor<string, []>("linear_44_cast_fp16")];
            tensor<int32, [5]> var_3399 = const()[name = tensor<string, []>("op_3399"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<fp16, [1, 1, 3, 12, 64]> qkv_cast_fp16 = reshape(shape = var_3399, x = linear_44_cast_fp16)[name = tensor<string, []>("qkv_cast_fp16")];
            tensor<int32, [5]> q_begin_0 = const()[name = tensor<string, []>("q_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])];
            tensor<int32, [5]> q_end_0 = const()[name = tensor<string, []>("q_end_0"), val = tensor<int32, [5]>([1, 1, 1, 12, 64])];
            tensor<bool, [5]> q_end_mask_0 = const()[name = tensor<string, []>("q_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> q_squeeze_mask_0 = const()[name = tensor<string, []>("q_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> q_cast_fp16 = slice_by_index(begin = q_begin_0, end = q_end_0, end_mask = q_end_mask_0, squeeze_mask = q_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor<string, []>("q_cast_fp16")];
            tensor<int32, [5]> new_k_begin_0 = const()[name = tensor<string, []>("new_k_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])];
            tensor<int32, [5]> new_k_end_0 = const()[name = tensor<string, []>("new_k_end_0"), val = tensor<int32, [5]>([1, 1, 2, 12, 64])];
            tensor<bool, [5]> new_k_end_mask_0 = const()[name = tensor<string, []>("new_k_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_k_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_k_cast_fp16 = slice_by_index(begin = new_k_begin_0, end = new_k_end_0, end_mask = new_k_end_mask_0, squeeze_mask = new_k_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor<string, []>("new_k_cast_fp16")];
            tensor<int32, [5]> new_v_begin_0 = const()[name = tensor<string, []>("new_v_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])];
            tensor<int32, [5]> new_v_end_0 = const()[name = tensor<string, []>("new_v_end_0"), val = tensor<int32, [5]>([1, 1, 3, 12, 64])];
            tensor<bool, [5]> new_v_end_mask_0 = const()[name = tensor<string, []>("new_v_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])];
            tensor<bool, [5]> new_v_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])];
            tensor<fp16, [1, 1, 12, 64]> new_v_cast_fp16 = slice_by_index(begin = new_v_begin_0, end = new_v_end_0, end_mask = new_v_end_mask_0, squeeze_mask = new_v_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor<string, []>("new_v_cast_fp16")];
            tensor<string, []> sa_k_in_11_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_k_in_11_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_in_11_to_fp16 = cast(dtype = sa_k_in_11_to_fp16_dtype_0, x = sa_k_in_11)[name = tensor<string, []>("cast_30")];
            tensor<fp16, [1, 600, 12, 64]> var_3460_cast_fp16 = mul(x = sa_k_in_11_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_3460_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_3461_cast_fp16 = mul(x = new_k_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_3461_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_k_out_cast_fp16 = add(x = var_3460_cast_fp16, y = var_3461_cast_fp16)[name = tensor<string, []>("sa_k_out_cast_fp16")];
            tensor<string, []> sa_k_out_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_k_out_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<string, []> sa_v_in_11_to_fp16_dtype_0 = const()[name = tensor<string, []>("sa_v_in_11_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_in_11_to_fp16 = cast(dtype = sa_v_in_11_to_fp16_dtype_0, x = sa_v_in_11)[name = tensor<string, []>("cast_29")];
            tensor<fp16, [1, 600, 12, 64]> var_3467_cast_fp16 = mul(x = sa_v_in_11_to_fp16, y = var_599_cast_fp16)[name = tensor<string, []>("op_3467_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> var_3468_cast_fp16 = mul(x = new_v_cast_fp16, y = write_oh_b_1_cast_fp16)[name = tensor<string, []>("op_3468_cast_fp16")];
            tensor<fp16, [1, 600, 12, 64]> sa_v_out_cast_fp16 = add(x = var_3467_cast_fp16, y = var_3468_cast_fp16)[name = tensor<string, []>("sa_v_out_cast_fp16")];
            tensor<string, []> sa_v_out_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("sa_v_out_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<int32, [4]> var_3487 = const()[name = tensor<string, []>("op_3487"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<bool, []> var_3489_transpose_x_0 = const()[name = tensor<string, []>("op_3489_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3489_transpose_y_0 = const()[name = tensor<string, []>("op_3489_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<int32, [4]> transpose_140_perm_0 = const()[name = tensor<string, []>("transpose_140_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_141_perm_0 = const()[name = tensor<string, []>("transpose_141_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 12, 64, 600]> transpose_141 = transpose(perm = transpose_141_perm_0, x = sa_k_out_cast_fp16)[name = tensor<string, []>("transpose_152")];
            tensor<fp16, [1, 12, 1, 64]> transpose_140 = transpose(perm = transpose_140_perm_0, x = q_cast_fp16)[name = tensor<string, []>("transpose_153")];
            tensor<fp16, [1, 12, 1, 600]> var_3489_cast_fp16 = matmul(transpose_x = var_3489_transpose_x_0, transpose_y = var_3489_transpose_y_0, x = transpose_140, y = transpose_141)[name = tensor<string, []>("op_3489_cast_fp16")];
            tensor<fp16, []> var_3490_to_fp16 = const()[name = tensor<string, []>("op_3490_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
            tensor<fp16, [1, 12, 1, 600]> scores_45_cast_fp16 = mul(x = var_3489_cast_fp16, y = var_3490_to_fp16)[name = tensor<string, []>("scores_45_cast_fp16")];
            tensor<fp16, []> var_3508_to_fp16 = const()[name = tensor<string, []>("op_3508_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 12, 1, 600]> scores_cast_fp16 = select(a = var_3508_to_fp16, b = scores_45_cast_fp16, cond = var_647_cast_fp16)[name = tensor<string, []>("scores_cast_fp16")];
            tensor<int32, []> var_3510 = const()[name = tensor<string, []>("op_3510"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 12, 1, 600]> probs_cast_fp16 = softmax(axis = var_3510, x = scores_cast_fp16)[name = tensor<string, []>("probs_cast_fp16")];
            tensor<bool, []> var_3513_transpose_x_0 = const()[name = tensor<string, []>("op_3513_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3513_transpose_y_0 = const()[name = tensor<string, []>("op_3513_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 12, 600, 64]> v_t_cast_fp16 = transpose(perm = var_3487, x = sa_v_out_cast_fp16)[name = tensor<string, []>("transpose_151")];
            tensor<fp16, [1, 12, 1, 64]> var_3513_cast_fp16 = matmul(transpose_x = var_3513_transpose_x_0, transpose_y = var_3513_transpose_y_0, x = probs_cast_fp16, y = v_t_cast_fp16)[name = tensor<string, []>("op_3513_cast_fp16")];
            tensor<int32, [4]> var_3518 = const()[name = tensor<string, []>("op_3518"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_3523 = const()[name = tensor<string, []>("op_3523"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 12, 64]> var_3519_cast_fp16 = transpose(perm = var_3518, x = var_3513_cast_fp16)[name = tensor<string, []>("transpose_150")];
            tensor<fp16, [1, 1, 768]> input_161_cast_fp16 = reshape(shape = var_3523, x = var_3519_cast_fp16)[name = tensor<string, []>("input_161_cast_fp16")];
            tensor<fp16, [768, 768]> dec_layers_11_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(83621952))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84211840))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_45_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_11_self_attention_o_net_weight_to_fp16_quantized, x = input_161_cast_fp16)[name = tensor<string, []>("linear_45_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_163_cast_fp16 = add(x = input_157_cast_fp16, y = linear_45_cast_fp16)[name = tensor<string, []>("input_163_cast_fp16")];
            tensor<int32, [1]> input_165_axes_0 = const()[name = tensor<string, []>("input_165_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_11_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_11_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84213440)))];
            tensor<fp16, []> var_3531_to_fp16 = const()[name = tensor<string, []>("op_3531_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_165_cast_fp16 = layer_norm(axes = input_165_axes_0, epsilon = var_3531_to_fp16, gamma = dec_layers_11_norm_xattn_query_weight_to_fp16, x = input_163_cast_fp16)[name = tensor<string, []>("input_165_cast_fp16")];
            tensor<fp16, [128, 768]> dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84215040))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84313408))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4057280)))];
            tensor<fp16, [1, 1, 128]> linear_46_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized, x = input_165_cast_fp16)[name = tensor<string, []>("linear_46_cast_fp16")];
            tensor<int32, [4]> var_3544 = const()[name = tensor<string, []>("op_3544"), val = tensor<int32, [4]>([1, 1, 1, 128])];
            tensor<fp16, [1, 1, 1, 128]> xq_proj_cast_fp16 = reshape(shape = var_3544, x = linear_46_cast_fp16)[name = tensor<string, []>("xq_proj_cast_fp16")];
            tensor<int32, [4]> var_3562 = const()[name = tensor<string, []>("op_3562"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<string, []> xa_v_11_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_v_11_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<bool, []> var_3564_transpose_x_0 = const()[name = tensor<string, []>("op_3564_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3564_transpose_y_0 = const()[name = tensor<string, []>("op_3564_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<string, []> xa_k_11_to_fp16_dtype_0 = const()[name = tensor<string, []>("xa_k_11_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
            tensor<int32, [4]> transpose_142_perm_0 = const()[name = tensor<string, []>("transpose_142_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
            tensor<int32, [4]> transpose_143_perm_0 = const()[name = tensor<string, []>("transpose_143_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
            tensor<fp16, [1, 256, 1, 128]> xa_k_11_to_fp16 = cast(dtype = xa_k_11_to_fp16_dtype_0, x = xa_k_11)[name = tensor<string, []>("cast_28")];
            tensor<fp16, [1, 1, 128, 256]> transpose_143 = transpose(perm = transpose_143_perm_0, x = xa_k_11_to_fp16)[name = tensor<string, []>("transpose_148")];
            tensor<fp16, [1, 1, 1, 128]> transpose_142 = transpose(perm = transpose_142_perm_0, x = xq_proj_cast_fp16)[name = tensor<string, []>("transpose_149")];
            tensor<fp16, [1, 1, 1, 256]> var_3564_cast_fp16 = matmul(transpose_x = var_3564_transpose_x_0, transpose_y = var_3564_transpose_y_0, x = transpose_142, y = transpose_143)[name = tensor<string, []>("op_3564_cast_fp16")];
            tensor<fp16, []> var_3565_to_fp16 = const()[name = tensor<string, []>("op_3565_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)];
            tensor<fp16, [1, 1, 1, 256]> xscores_45_cast_fp16 = mul(x = var_3564_cast_fp16, y = var_3565_to_fp16)[name = tensor<string, []>("xscores_45_cast_fp16")];
            tensor<fp16, []> var_3583_to_fp16 = const()[name = tensor<string, []>("op_3583_to_fp16"), val = tensor<fp16, []>(-inf)];
            tensor<fp16, [1, 1, 1, 256]> xscores_cast_fp16 = select(a = var_3583_to_fp16, b = xscores_45_cast_fp16, cond = var_722_cast_fp16)[name = tensor<string, []>("xscores_cast_fp16")];
            tensor<int32, []> var_3585 = const()[name = tensor<string, []>("op_3585"), val = tensor<int32, []>(-1)];
            tensor<fp16, [1, 1, 1, 256]> xprobs_cast_fp16 = softmax(axis = var_3585, x = xscores_cast_fp16)[name = tensor<string, []>("xprobs_cast_fp16")];
            tensor<bool, []> var_3588_transpose_x_0 = const()[name = tensor<string, []>("op_3588_transpose_x_0"), val = tensor<bool, []>(false)];
            tensor<bool, []> var_3588_transpose_y_0 = const()[name = tensor<string, []>("op_3588_transpose_y_0"), val = tensor<bool, []>(false)];
            tensor<fp16, [1, 256, 1, 128]> xa_v_11_to_fp16 = cast(dtype = xa_v_11_to_fp16_dtype_0, x = xa_v_11)[name = tensor<string, []>("cast_27")];
            tensor<fp16, [1, 1, 256, 128]> xvT_cast_fp16 = transpose(perm = var_3562, x = xa_v_11_to_fp16)[name = tensor<string, []>("transpose_147")];
            tensor<fp16, [1, 1, 1, 128]> var_3588_cast_fp16 = matmul(transpose_x = var_3588_transpose_x_0, transpose_y = var_3588_transpose_y_0, x = xprobs_cast_fp16, y = xvT_cast_fp16)[name = tensor<string, []>("op_3588_cast_fp16")];
            tensor<int32, [4]> var_3593 = const()[name = tensor<string, []>("op_3593"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_3598 = const()[name = tensor<string, []>("op_3598"), val = tensor<int32, [3]>([1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> var_3594_cast_fp16 = transpose(perm = var_3593, x = var_3588_cast_fp16)[name = tensor<string, []>("transpose_146")];
            tensor<fp16, [1, 1, 128]> input_167_cast_fp16 = reshape(shape = var_3598, x = var_3594_cast_fp16)[name = tensor<string, []>("input_167_cast_fp16")];
            tensor<fp16, [768, 128]> dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84313728))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84412096))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 1, 768]> linear_47_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized, x = input_167_cast_fp16)[name = tensor<string, []>("linear_47_cast_fp16")];
            tensor<fp16, [1, 1, 768]> input_169_cast_fp16 = add(x = input_163_cast_fp16, y = linear_47_cast_fp16)[name = tensor<string, []>("input_169_cast_fp16")];
            tensor<int32, [1]> x_89_axes_0 = const()[name = tensor<string, []>("x_89_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_layers_11_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_11_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84413696)))];
            tensor<fp16, []> var_3606_to_fp16 = const()[name = tensor<string, []>("op_3606_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> x_89_cast_fp16 = layer_norm(axes = x_89_axes_0, epsilon = var_3606_to_fp16, gamma = dec_layers_11_norm_pos_ff_weight_to_fp16, x = input_169_cast_fp16)[name = tensor<string, []>("x_89_cast_fp16")];
            tensor<int32, [3]> var_3622 = const()[name = tensor<string, []>("op_3622"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<string, []> y_45_pad_type_0 = const()[name = tensor<string, []>("y_45_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_45_strides_0 = const()[name = tensor<string, []>("y_45_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_45_pad_0 = const()[name = tensor<string, []>("y_45_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_45_dilations_0 = const()[name = tensor<string, []>("y_45_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_45_groups_0 = const()[name = tensor<string, []>("y_45_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [3072, 768, 1]> dec_layers_11_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84415296))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86774656))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(6519040)))];
            tensor<fp16, [1, 768, 1]> x_91_cast_fp16 = transpose(perm = var_3622, x = x_89_cast_fp16)[name = tensor<string, []>("transpose_145")];
            tensor<fp16, [1, 3072, 1]> y_45_cast_fp16 = conv(dilations = y_45_dilations_0, groups = y_45_groups_0, pad = y_45_pad_0, pad_type = y_45_pad_type_0, strides = y_45_strides_0, weight = dec_layers_11_pos_ff_proj_weight_to_fp16_quantized, x = x_91_cast_fp16)[name = tensor<string, []>("y_45_cast_fp16")];
            tensor<string, []> x_93_mode_0 = const()[name = tensor<string, []>("x_93_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")];
            tensor<fp16, [1, 3072, 1]> x_93_cast_fp16 = gelu(mode = x_93_mode_0, x = y_45_cast_fp16)[name = tensor<string, []>("x_93_cast_fp16")];
            tensor<string, []> y_pad_type_0 = const()[name = tensor<string, []>("y_pad_type_0"), val = tensor<string, []>("valid")];
            tensor<int32, [1]> y_strides_0 = const()[name = tensor<string, []>("y_strides_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, [2]> y_pad_0 = const()[name = tensor<string, []>("y_pad_0"), val = tensor<int32, [2]>([0, 0])];
            tensor<int32, [1]> y_dilations_0 = const()[name = tensor<string, []>("y_dilations_0"), val = tensor<int32, [1]>([1])];
            tensor<int32, []> y_groups_0 = const()[name = tensor<string, []>("y_groups_0"), val = tensor<int32, []>(1)];
            tensor<fp16, [768, 3072, 1]> dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(86780864))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89140224))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3953280)))];
            tensor<fp16, [1, 768, 1]> y_cast_fp16 = conv(dilations = y_dilations_0, groups = y_groups_0, pad = y_pad_0, pad_type = y_pad_type_0, strides = y_strides_0, weight = dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized, x = x_93_cast_fp16)[name = tensor<string, []>("y_cast_fp16")];
            tensor<int32, [3]> var_3640 = const()[name = tensor<string, []>("op_3640"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 768]> var_3641_cast_fp16 = transpose(perm = var_3640, x = y_cast_fp16)[name = tensor<string, []>("transpose_144")];
            tensor<fp16, [1, 1, 768]> input_171_cast_fp16 = add(x = input_169_cast_fp16, y = var_3641_cast_fp16)[name = tensor<string, []>("input_171_cast_fp16")];
            tensor<int32, [1]> input_axes_0 = const()[name = tensor<string, []>("input_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [768]> dec_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("dec_norm_out_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89141824)))];
            tensor<fp16, []> var_3645_to_fp16 = const()[name = tensor<string, []>("op_3645_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
            tensor<fp16, [1, 1, 768]> input_cast_fp16 = layer_norm(axes = input_axes_0, epsilon = var_3645_to_fp16, gamma = dec_norm_out_weight_to_fp16, x = input_171_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
            tensor<string, []> input_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("input_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<fp16, [16192, 768]> dec_final_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_final_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [16192, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(89143424))), scale = tensor<fp16, [16192]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101595200))), zero_point = tensor<int8, [16192]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101578944)))];
            tensor<fp16, [16192]> dec_final_proj_bias_to_fp16 = const()[name = tensor<string, []>("dec_final_proj_bias_to_fp16"), val = tensor<fp16, [16192]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(101627648)))];
            tensor<fp16, [1, 1, 16192]> linear_48_cast_fp16 = linear(bias = dec_final_proj_bias_to_fp16, weight = dec_final_proj_weight_to_fp16_quantized, x = input_cast_fp16)[name = tensor<string, []>("linear_48_cast_fp16")];
            tensor<int32, [4]> var_3658 = const()[name = tensor<string, []>("op_3658"), val = tensor<int32, [4]>([1, 1, 8, 2024])];
            tensor<fp16, [1, 1, 8, 2024]> var_3659_cast_fp16 = reshape(shape = var_3658, x = linear_48_cast_fp16)[name = tensor<string, []>("op_3659_cast_fp16")];
            tensor<string, []> var_3659_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_3659_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_0 = cast(dtype = sa_k_out_1_cast_fp16_to_fp32_dtype_0, x = sa_k_out_1_cast_fp16)[name = tensor<string, []>("cast_1")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_0 = cast(dtype = sa_v_out_1_cast_fp16_to_fp32_dtype_0, x = sa_v_out_1_cast_fp16)[name = tensor<string, []>("cast_2")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_1 = cast(dtype = sa_k_out_3_cast_fp16_to_fp32_dtype_0, x = sa_k_out_3_cast_fp16)[name = tensor<string, []>("cast_3")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_1 = cast(dtype = sa_v_out_3_cast_fp16_to_fp32_dtype_0, x = sa_v_out_3_cast_fp16)[name = tensor<string, []>("cast_4")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_2 = cast(dtype = sa_k_out_5_cast_fp16_to_fp32_dtype_0, x = sa_k_out_5_cast_fp16)[name = tensor<string, []>("cast_5")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_2 = cast(dtype = sa_v_out_5_cast_fp16_to_fp32_dtype_0, x = sa_v_out_5_cast_fp16)[name = tensor<string, []>("cast_6")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_3 = cast(dtype = sa_k_out_7_cast_fp16_to_fp32_dtype_0, x = sa_k_out_7_cast_fp16)[name = tensor<string, []>("cast_7")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_3 = cast(dtype = sa_v_out_7_cast_fp16_to_fp32_dtype_0, x = sa_v_out_7_cast_fp16)[name = tensor<string, []>("cast_8")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_4 = cast(dtype = sa_k_out_9_cast_fp16_to_fp32_dtype_0, x = sa_k_out_9_cast_fp16)[name = tensor<string, []>("cast_9")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_4 = cast(dtype = sa_v_out_9_cast_fp16_to_fp32_dtype_0, x = sa_v_out_9_cast_fp16)[name = tensor<string, []>("cast_10")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_5 = cast(dtype = sa_k_out_11_cast_fp16_to_fp32_dtype_0, x = sa_k_out_11_cast_fp16)[name = tensor<string, []>("cast_11")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_5 = cast(dtype = sa_v_out_11_cast_fp16_to_fp32_dtype_0, x = sa_v_out_11_cast_fp16)[name = tensor<string, []>("cast_12")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_6 = cast(dtype = sa_k_out_13_cast_fp16_to_fp32_dtype_0, x = sa_k_out_13_cast_fp16)[name = tensor<string, []>("cast_13")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_6 = cast(dtype = sa_v_out_13_cast_fp16_to_fp32_dtype_0, x = sa_v_out_13_cast_fp16)[name = tensor<string, []>("cast_14")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_7 = cast(dtype = sa_k_out_15_cast_fp16_to_fp32_dtype_0, x = sa_k_out_15_cast_fp16)[name = tensor<string, []>("cast_15")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_7 = cast(dtype = sa_v_out_15_cast_fp16_to_fp32_dtype_0, x = sa_v_out_15_cast_fp16)[name = tensor<string, []>("cast_16")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_8 = cast(dtype = sa_k_out_17_cast_fp16_to_fp32_dtype_0, x = sa_k_out_17_cast_fp16)[name = tensor<string, []>("cast_17")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_8 = cast(dtype = sa_v_out_17_cast_fp16_to_fp32_dtype_0, x = sa_v_out_17_cast_fp16)[name = tensor<string, []>("cast_18")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_9 = cast(dtype = sa_k_out_19_cast_fp16_to_fp32_dtype_0, x = sa_k_out_19_cast_fp16)[name = tensor<string, []>("cast_19")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_9 = cast(dtype = sa_v_out_19_cast_fp16_to_fp32_dtype_0, x = sa_v_out_19_cast_fp16)[name = tensor<string, []>("cast_20")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_10 = cast(dtype = sa_k_out_21_cast_fp16_to_fp32_dtype_0, x = sa_k_out_21_cast_fp16)[name = tensor<string, []>("cast_21")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_10 = cast(dtype = sa_v_out_21_cast_fp16_to_fp32_dtype_0, x = sa_v_out_21_cast_fp16)[name = tensor<string, []>("cast_22")];
            tensor<fp32, [1, 600, 12, 64]> sa_k_out_11 = cast(dtype = sa_k_out_cast_fp16_to_fp32_dtype_0, x = sa_k_out_cast_fp16)[name = tensor<string, []>("cast_23")];
            tensor<fp32, [1, 600, 12, 64]> sa_v_out_11 = cast(dtype = sa_v_out_cast_fp16_to_fp32_dtype_0, x = sa_v_out_cast_fp16)[name = tensor<string, []>("cast_24")];
            tensor<fp32, [1, 1, 768]> h_last = cast(dtype = input_cast_fp16_to_fp32_dtype_0, x = input_cast_fp16)[name = tensor<string, []>("cast_25")];
            tensor<fp32, [1, 1, 8, 2024]> logits = cast(dtype = var_3659_cast_fp16_to_fp32_dtype_0, x = var_3659_cast_fp16)[name = tensor<string, []>("cast_26")];
            tensor<fp32, [1, 256, 768]> encoder_output_tmp = identity(x = encoder_output)[name = tensor<string, []>("encoder_output_tmp")];
        } -> (logits, h_last, sa_k_out_0, sa_k_out_1, sa_k_out_2, sa_k_out_3, sa_k_out_4, sa_k_out_5, sa_k_out_6, sa_k_out_7, sa_k_out_8, sa_k_out_9, sa_k_out_10, sa_k_out_11, sa_v_out_0, sa_v_out_1, sa_v_out_2, sa_v_out_3, sa_v_out_4, sa_v_out_5, sa_v_out_6, sa_v_out_7, sa_v_out_8, sa_v_out_9, sa_v_out_10, sa_v_out_11);
}