program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}})] { func main(tensor encoder_mask, tensor encoder_output, tensor speaker_idx) { tensor baked_flat_batch_dims_0 = const()[name = tensor("baked_flat_batch_dims_0"), val = tensor(0)]; tensor baked_flat_validate_indices_0 = const()[name = tensor("baked_flat_validate_indices_0"), val = tensor(false)]; tensor dec_baked_context_embedding_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_baked_context_embedding_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), scale = tensor([0x1.a8p-4, 0x1.adcp-4, 0x1.cap-4, 0x1.c38p-4, 0x1.bdp-4]), zero_point = tensor([0, 0, 0, 0, 0])]; tensor speaker_idx_to_int16_dtype_0 = const()[name = tensor("speaker_idx_to_int16_dtype_0"), val = tensor("int16")]; tensor cast_73_dtype_0 = const()[name = tensor("cast_73_dtype_0"), val = tensor("int32")]; tensor greater_equal_0_y_0 = const()[name = tensor("greater_equal_0_y_0"), val = tensor(0)]; tensor speaker_idx_to_int16 = cast(dtype = speaker_idx_to_int16_dtype_0, x = speaker_idx)[name = tensor("cast_55")]; tensor cast_73 = cast(dtype = cast_73_dtype_0, x = speaker_idx_to_int16)[name = tensor("cast_54")]; tensor greater_equal_0 = greater_equal(x = cast_73, y = greater_equal_0_y_0)[name = tensor("greater_equal_0")]; tensor slice_by_index_0 = const()[name = tensor("slice_by_index_0"), val = tensor(5)]; tensor add_0 = add(x = cast_73, y = slice_by_index_0)[name = tensor("add_0")]; tensor select_0 = select(a = cast_73, b = add_0, cond = greater_equal_0)[name = tensor("select_0")]; tensor select_0_to_int16_dtype_0 = const()[name = tensor("select_0_to_int16_dtype_0"), val = tensor("int16")]; tensor cast_0_dtype_0 = const()[name = tensor("cast_0_dtype_0"), val = tensor("int32")]; tensor greater_equal_0_y_0_1 = const()[name = tensor("greater_equal_0_y_0_1"), val = tensor(0)]; tensor select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = tensor("cast_53")]; tensor cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = tensor("cast_52")]; tensor greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = tensor("greater_equal_0_1")]; tensor slice_by_index_0_1 = const()[name = tensor("slice_by_index_0_1"), val = tensor(5)]; tensor add_0_1 = add(x = cast_0, y = slice_by_index_0_1)[name = tensor("add_0_1")]; tensor select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = tensor("select_0_1")]; tensor baked_flat_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = tensor("baked_flat_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = tensor(0)]; tensor baked_flat_cast_fp16_cast_uint16_cast_uint16 = gather(axis = baked_flat_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = baked_flat_batch_dims_0, indices = select_0_1, validate_indices = baked_flat_validate_indices_0, x = dec_baked_context_embedding_weight_to_fp16_quantized)[name = tensor("baked_flat_cast_fp16_cast_uint16_cast_uint16")]; tensor var_77 = const()[name = tensor("op_77"), val = tensor([1, 110, 768])]; tensor baked_cast_fp16 = reshape(shape = var_77, x = baked_flat_cast_fp16_cast_uint16_cast_uint16)[name = tensor("baked_cast_fp16")]; tensor var_162 = const()[name = tensor("op_162"), val = tensor(1)]; tensor x_1_interleave_0 = const()[name = tensor("x_1_interleave_0"), val = tensor(false)]; tensor bos_emb_to_fp16 = const()[name = tensor("bos_emb_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(422528)))]; tensor x_1_cast_fp16 = concat(axis = var_162, interleave = x_1_interleave_0, values = (baked_cast_fp16, bos_emb_to_fp16))[name = tensor("x_1_cast_fp16")]; tensor op_173_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("op_173_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(424128))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(509632))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(509440)))]; tensor x_3_cast_fp16 = add(x = x_1_cast_fp16, y = op_173_to_fp16_quantized)[name = tensor("x_3_cast_fp16")]; tensor var_201 = const()[name = tensor("op_201"), val = tensor(-1)]; tensor input_21_axes_0 = const()[name = tensor("input_21_axes_0"), val = tensor([-1])]; tensor dec_layers_0_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_0_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(509952)))]; tensor var_199_to_fp16 = const()[name = tensor("op_199_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_21_cast_fp16 = layer_norm(axes = input_21_axes_0, epsilon = var_199_to_fp16, gamma = dec_layers_0_norm_self_weight_to_fp16, x = x_3_cast_fp16)[name = tensor("input_21_cast_fp16")]; tensor dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(511552))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2283456))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2281088)))]; tensor linear_0_bias_0_to_fp16 = const()[name = tensor("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2288128)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized, x = input_21_cast_fp16)[name = tensor("linear_0_cast_fp16")]; tensor var_220 = const()[name = tensor("op_220"), val = tensor([1, 111, 3, 12, 64])]; tensor qkv_3_cast_fp16 = reshape(shape = var_220, x = linear_0_cast_fp16)[name = tensor("qkv_3_cast_fp16")]; tensor q_1_begin_0 = const()[name = tensor("q_1_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_1_end_0 = const()[name = tensor("q_1_end_0"), val = tensor([1, 111, 1, 12, 64])]; tensor q_1_end_mask_0 = const()[name = tensor("q_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_1_squeeze_mask_0 = const()[name = tensor("q_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_1_cast_fp16 = slice_by_index(begin = q_1_begin_0, end = q_1_end_0, end_mask = q_1_end_mask_0, squeeze_mask = q_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor("q_1_cast_fp16")]; tensor new_k_1_begin_0 = const()[name = tensor("new_k_1_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_1_end_0 = const()[name = tensor("new_k_1_end_0"), val = tensor([1, 111, 2, 12, 64])]; tensor new_k_1_end_mask_0 = const()[name = tensor("new_k_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_1_squeeze_mask_0 = const()[name = tensor("new_k_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_1_cast_fp16 = slice_by_index(begin = new_k_1_begin_0, end = new_k_1_end_0, end_mask = new_k_1_end_mask_0, squeeze_mask = new_k_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor("new_k_1_cast_fp16")]; tensor new_k_1_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_k_1_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor new_v_1_begin_0 = const()[name = tensor("new_v_1_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_1_end_0 = const()[name = tensor("new_v_1_end_0"), val = tensor([1, 111, 3, 12, 64])]; tensor new_v_1_end_mask_0 = const()[name = tensor("new_v_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_1_squeeze_mask_0 = const()[name = tensor("new_v_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_1_cast_fp16 = slice_by_index(begin = new_v_1_begin_0, end = new_v_1_end_0, end_mask = new_v_1_end_mask_0, squeeze_mask = new_v_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor("new_v_1_cast_fp16")]; tensor new_v_1_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_v_1_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_235 = const()[name = tensor("op_235"), val = tensor([0, 2, -3, -1])]; tensor var_237_transpose_x_0 = const()[name = tensor("op_237_transpose_x_0"), val = tensor(false)]; tensor var_237_transpose_y_0 = const()[name = tensor("op_237_transpose_y_0"), val = tensor(false)]; tensor transpose_96_perm_0 = const()[name = tensor("transpose_96_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_97_perm_0 = const()[name = tensor("transpose_97_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_97 = transpose(perm = transpose_97_perm_0, x = new_k_1_cast_fp16)[name = tensor("transpose_262")]; tensor transpose_96 = transpose(perm = transpose_96_perm_0, x = q_1_cast_fp16)[name = tensor("transpose_263")]; tensor var_237_cast_fp16 = matmul(transpose_x = var_237_transpose_x_0, transpose_y = var_237_transpose_y_0, x = transpose_96, y = transpose_97)[name = tensor("op_237_cast_fp16")]; tensor var_238_to_fp16 = const()[name = tensor("op_238_to_fp16"), val = tensor(0x1p-3)]; tensor scores_1_cast_fp16 = mul(x = var_237_cast_fp16, y = var_238_to_fp16)[name = tensor("scores_1_cast_fp16")]; tensor var_196_to_fp16 = const()[name = tensor("op_196_to_fp16"), val = tensor(-inf)]; tensor scores_3_cast_fp16_x_0 = const()[name = tensor("scores_3_cast_fp16_x_0"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2292800)))]; tensor scores_3_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_1_cast_fp16)[name = tensor("scores_3_cast_fp16")]; tensor probs_1_cast_fp16 = softmax(axis = var_201, x = scores_3_cast_fp16)[name = tensor("probs_1_cast_fp16")]; tensor var_258_transpose_x_0 = const()[name = tensor("op_258_transpose_x_0"), val = tensor(false)]; tensor var_258_transpose_y_0 = const()[name = tensor("op_258_transpose_y_0"), val = tensor(false)]; tensor vT_1_cast_fp16 = transpose(perm = var_235, x = new_v_1_cast_fp16)[name = tensor("transpose_261")]; tensor var_258_cast_fp16 = matmul(transpose_x = var_258_transpose_x_0, transpose_y = var_258_transpose_y_0, x = probs_1_cast_fp16, y = vT_1_cast_fp16)[name = tensor("op_258_cast_fp16")]; tensor var_259 = const()[name = tensor("op_259"), val = tensor([0, 2, 1, 3])]; tensor var_263 = const()[name = tensor("op_263"), val = tensor([1, 111, -1])]; tensor y_1_cast_fp16 = transpose(perm = var_259, x = var_258_cast_fp16)[name = tensor("transpose_260")]; tensor input_23_cast_fp16 = reshape(shape = var_263, x = y_1_cast_fp16)[name = tensor("input_23_cast_fp16")]; tensor dec_layers_0_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_0_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2317568))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2908288))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_1_bias_0_to_fp16 = const()[name = tensor("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2909888)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_0_self_attention_o_net_weight_to_fp16_quantized, x = input_23_cast_fp16)[name = tensor("linear_1_cast_fp16")]; tensor input_25_cast_fp16 = add(x = x_3_cast_fp16, y = linear_1_cast_fp16)[name = tensor("input_25_cast_fp16")]; tensor x_5_axes_0 = const()[name = tensor("x_5_axes_0"), val = tensor([-1])]; tensor dec_layers_0_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_0_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2911488)))]; tensor x_5_cast_fp16 = layer_norm(axes = x_5_axes_0, epsilon = var_199_to_fp16, gamma = dec_layers_0_norm_xattn_query_weight_to_fp16, x = input_25_cast_fp16)[name = tensor("x_5_cast_fp16")]; tensor memory_1_axes_0 = const()[name = tensor("memory_1_axes_0"), val = tensor([-1])]; tensor encoder_output_to_fp16_dtype_0 = const()[name = tensor("encoder_output_to_fp16_dtype_0"), val = tensor("fp16")]; tensor dec_layers_0_norm_xattn_memory_weight_to_fp16 = const()[name = tensor("dec_layers_0_norm_xattn_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2913088)))]; tensor encoder_output_to_fp16 = cast(dtype = encoder_output_to_fp16_dtype_0, x = encoder_output)[name = tensor("cast_51")]; tensor memory_1_cast_fp16 = layer_norm(axes = memory_1_axes_0, epsilon = var_199_to_fp16, gamma = dec_layers_0_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor("memory_1_cast_fp16")]; tensor dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2914688))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3013248))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3013056)))]; tensor linear_2_bias_0_to_fp16 = const()[name = tensor("linear_2_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3013568)))]; tensor linear_2_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized, x = x_5_cast_fp16)[name = tensor("linear_2_cast_fp16")]; tensor var_285 = const()[name = tensor("op_285"), val = tensor([1, 111, 1, 128])]; tensor q_5_cast_fp16 = reshape(shape = var_285, x = linear_2_cast_fp16)[name = tensor("q_5_cast_fp16")]; tensor dec_layers_0_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_0_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3013888))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3210880))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3210560)))]; tensor linear_3_bias_0_to_fp16 = const()[name = tensor("linear_3_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3211456)))]; tensor linear_3_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_0_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_1_cast_fp16)[name = tensor("linear_3_cast_fp16")]; tensor var_291 = const()[name = tensor("op_291"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_1_cast_fp16 = reshape(shape = var_291, x = linear_3_cast_fp16)[name = tensor("kv_1_cast_fp16")]; tensor k_1_begin_0 = const()[name = tensor("k_1_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor k_1_end_0 = const()[name = tensor("k_1_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor k_1_end_mask_0 = const()[name = tensor("k_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_1_squeeze_mask_0 = const()[name = tensor("k_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_1_cast_fp16 = slice_by_index(begin = k_1_begin_0, end = k_1_end_0, end_mask = k_1_end_mask_0, squeeze_mask = k_1_squeeze_mask_0, x = kv_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; tensor k_1_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("k_1_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor v_1_begin_0 = const()[name = tensor("v_1_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor v_1_end_0 = const()[name = tensor("v_1_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor v_1_end_mask_0 = const()[name = tensor("v_1_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_1_squeeze_mask_0 = const()[name = tensor("v_1_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_1_cast_fp16 = slice_by_index(begin = v_1_begin_0, end = v_1_end_0, end_mask = v_1_end_mask_0, squeeze_mask = v_1_squeeze_mask_0, x = kv_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; tensor v_1_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("v_1_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_304 = const()[name = tensor("op_304"), val = tensor([0, 2, -3, -1])]; tensor var_306_transpose_x_0 = const()[name = tensor("op_306_transpose_x_0"), val = tensor(false)]; tensor var_306_transpose_y_0 = const()[name = tensor("op_306_transpose_y_0"), val = tensor(false)]; tensor transpose_98_perm_0 = const()[name = tensor("transpose_98_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_99_perm_0 = const()[name = tensor("transpose_99_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_99 = transpose(perm = transpose_99_perm_0, x = k_1_cast_fp16)[name = tensor("transpose_258")]; tensor transpose_98 = transpose(perm = transpose_98_perm_0, x = q_5_cast_fp16)[name = tensor("transpose_259")]; tensor var_306_cast_fp16 = matmul(transpose_x = var_306_transpose_x_0, transpose_y = var_306_transpose_y_0, x = transpose_98, y = transpose_99)[name = tensor("op_306_cast_fp16")]; tensor var_307_to_fp16 = const()[name = tensor("op_307_to_fp16"), val = tensor(0x1.6ap-4)]; tensor scores_7_cast_fp16 = mul(x = var_306_cast_fp16, y = var_307_to_fp16)[name = tensor("scores_7_cast_fp16")]; tensor var_310_axes_0 = const()[name = tensor("op_310_axes_0"), val = tensor([1])]; tensor encoder_mask_to_fp16_dtype_0 = const()[name = tensor("encoder_mask_to_fp16_dtype_0"), val = tensor("fp16")]; tensor encoder_mask_to_fp16 = cast(dtype = encoder_mask_to_fp16_dtype_0, x = encoder_mask)[name = tensor("cast_50")]; tensor var_310_cast_fp16 = expand_dims(axes = var_310_axes_0, x = encoder_mask_to_fp16)[name = tensor("op_310_cast_fp16")]; tensor var_311_axes_0 = const()[name = tensor("op_311_axes_0"), val = tensor([2])]; tensor var_311_cast_fp16 = expand_dims(axes = var_311_axes_0, x = var_310_cast_fp16)[name = tensor("op_311_cast_fp16")]; tensor var_186_promoted_1_to_fp16 = const()[name = tensor("op_186_promoted_1_to_fp16"), val = tensor(0x0p+0)]; tensor var_313_cast_fp16 = equal(x = var_311_cast_fp16, y = var_186_promoted_1_to_fp16)[name = tensor("op_313_cast_fp16")]; tensor scores_9_cast_fp16 = select(a = var_196_to_fp16, b = scores_7_cast_fp16, cond = var_313_cast_fp16)[name = tensor("scores_9_cast_fp16")]; tensor probs_3_cast_fp16 = softmax(axis = var_201, x = scores_9_cast_fp16)[name = tensor("probs_3_cast_fp16")]; tensor var_316_transpose_x_0 = const()[name = tensor("op_316_transpose_x_0"), val = tensor(false)]; tensor var_316_transpose_y_0 = const()[name = tensor("op_316_transpose_y_0"), val = tensor(false)]; tensor vT_3_cast_fp16 = transpose(perm = var_304, x = v_1_cast_fp16)[name = tensor("transpose_257")]; tensor var_316_cast_fp16 = matmul(transpose_x = var_316_transpose_x_0, transpose_y = var_316_transpose_y_0, x = probs_3_cast_fp16, y = vT_3_cast_fp16)[name = tensor("op_316_cast_fp16")]; tensor var_317 = const()[name = tensor("op_317"), val = tensor([0, 2, 1, 3])]; tensor var_319 = const()[name = tensor("op_319"), val = tensor([1, 111, -1])]; tensor var_318_cast_fp16 = transpose(perm = var_317, x = var_316_cast_fp16)[name = tensor("transpose_256")]; tensor input_27_cast_fp16 = reshape(shape = var_319, x = var_318_cast_fp16)[name = tensor("input_27_cast_fp16")]; tensor dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3212032))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3310400))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_4_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = tensor("linear_4_cast_fp16")]; tensor input_29_cast_fp16 = add(x = input_25_cast_fp16, y = linear_4_cast_fp16)[name = tensor("input_29_cast_fp16")]; tensor x_7_axes_0 = const()[name = tensor("x_7_axes_0"), val = tensor([-1])]; tensor dec_layers_0_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_0_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3312000)))]; tensor x_7_cast_fp16 = layer_norm(axes = x_7_axes_0, epsilon = var_199_to_fp16, gamma = dec_layers_0_norm_pos_ff_weight_to_fp16, x = input_29_cast_fp16)[name = tensor("x_7_cast_fp16")]; tensor var_336 = const()[name = tensor("op_336"), val = tensor([0, 2, 1])]; tensor y_3_pad_type_0 = const()[name = tensor("y_3_pad_type_0"), val = tensor("valid")]; tensor y_3_strides_0 = const()[name = tensor("y_3_strides_0"), val = tensor([1])]; tensor y_3_pad_0 = const()[name = tensor("y_3_pad_0"), val = tensor([0, 0])]; tensor y_3_dilations_0 = const()[name = tensor("y_3_dilations_0"), val = tensor([1])]; tensor y_3_groups_0 = const()[name = tensor("y_3_groups_0"), val = tensor(1)]; tensor dec_layers_0_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_0_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3313600))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5676096))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5672960)))]; tensor x_11_cast_fp16 = transpose(perm = var_336, x = x_7_cast_fp16)[name = tensor("transpose_255")]; tensor y_3_cast_fp16 = conv(dilations = y_3_dilations_0, groups = y_3_groups_0, pad = y_3_pad_0, pad_type = y_3_pad_type_0, strides = y_3_strides_0, weight = dec_layers_0_pos_ff_proj_weight_to_fp16_quantized, x = x_11_cast_fp16)[name = tensor("y_3_cast_fp16")]; tensor x_13_mode_0 = const()[name = tensor("x_13_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_13_cast_fp16 = gelu(mode = x_13_mode_0, x = y_3_cast_fp16)[name = tensor("x_13_cast_fp16")]; tensor y_5_pad_type_0 = const()[name = tensor("y_5_pad_type_0"), val = tensor("valid")]; tensor y_5_strides_0 = const()[name = tensor("y_5_strides_0"), val = tensor([1])]; tensor y_5_pad_0 = const()[name = tensor("y_5_pad_0"), val = tensor([0, 0])]; tensor y_5_dilations_0 = const()[name = tensor("y_5_dilations_0"), val = tensor([1])]; tensor y_5_groups_0 = const()[name = tensor("y_5_groups_0"), val = tensor(1)]; tensor dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5682304))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8041664))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor y_5_cast_fp16 = conv(dilations = y_5_dilations_0, groups = y_5_groups_0, pad = y_5_pad_0, pad_type = y_5_pad_type_0, strides = y_5_strides_0, weight = dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized, x = x_13_cast_fp16)[name = tensor("y_5_cast_fp16")]; tensor var_356 = const()[name = tensor("op_356"), val = tensor([0, 2, 1])]; tensor h_9_cast_fp16 = transpose(perm = var_356, x = y_5_cast_fp16)[name = tensor("transpose_254")]; tensor x_17_cast_fp16 = add(x = input_29_cast_fp16, y = h_9_cast_fp16)[name = tensor("x_17_cast_fp16")]; tensor var_386 = const()[name = tensor("op_386"), val = tensor(-1)]; tensor input_33_axes_0 = const()[name = tensor("input_33_axes_0"), val = tensor([-1])]; tensor dec_layers_1_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_1_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8043264)))]; tensor var_384_to_fp16 = const()[name = tensor("op_384_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, epsilon = var_384_to_fp16, gamma = dec_layers_1_norm_self_weight_to_fp16, x = x_17_cast_fp16)[name = tensor("input_33_cast_fp16")]; tensor dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8044864))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9814400))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2281088)))]; tensor linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized, x = input_33_cast_fp16)[name = tensor("linear_5_cast_fp16")]; tensor var_405 = const()[name = tensor("op_405"), val = tensor([1, 111, 3, 12, 64])]; tensor qkv_7_cast_fp16 = reshape(shape = var_405, x = linear_5_cast_fp16)[name = tensor("qkv_7_cast_fp16")]; tensor q_9_begin_0 = const()[name = tensor("q_9_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_9_end_0 = const()[name = tensor("q_9_end_0"), val = tensor([1, 111, 1, 12, 64])]; tensor q_9_end_mask_0 = const()[name = tensor("q_9_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_9_squeeze_mask_0 = const()[name = tensor("q_9_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_9_cast_fp16 = slice_by_index(begin = q_9_begin_0, end = q_9_end_0, end_mask = q_9_end_mask_0, squeeze_mask = q_9_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor("q_9_cast_fp16")]; tensor new_k_3_begin_0 = const()[name = tensor("new_k_3_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_3_end_0 = const()[name = tensor("new_k_3_end_0"), val = tensor([1, 111, 2, 12, 64])]; tensor new_k_3_end_mask_0 = const()[name = tensor("new_k_3_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_3_squeeze_mask_0 = const()[name = tensor("new_k_3_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_3_cast_fp16 = slice_by_index(begin = new_k_3_begin_0, end = new_k_3_end_0, end_mask = new_k_3_end_mask_0, squeeze_mask = new_k_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor("new_k_3_cast_fp16")]; tensor new_k_3_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_k_3_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor new_v_3_begin_0 = const()[name = tensor("new_v_3_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_3_end_0 = const()[name = tensor("new_v_3_end_0"), val = tensor([1, 111, 3, 12, 64])]; tensor new_v_3_end_mask_0 = const()[name = tensor("new_v_3_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_3_squeeze_mask_0 = const()[name = tensor("new_v_3_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_3_cast_fp16 = slice_by_index(begin = new_v_3_begin_0, end = new_v_3_end_0, end_mask = new_v_3_end_mask_0, squeeze_mask = new_v_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor("new_v_3_cast_fp16")]; tensor new_v_3_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_v_3_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_420 = const()[name = tensor("op_420"), val = tensor([0, 2, -3, -1])]; tensor var_422_transpose_x_0 = const()[name = tensor("op_422_transpose_x_0"), val = tensor(false)]; tensor var_422_transpose_y_0 = const()[name = tensor("op_422_transpose_y_0"), val = tensor(false)]; tensor transpose_100_perm_0 = const()[name = tensor("transpose_100_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_101_perm_0 = const()[name = tensor("transpose_101_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_101 = transpose(perm = transpose_101_perm_0, x = new_k_3_cast_fp16)[name = tensor("transpose_252")]; tensor transpose_100 = transpose(perm = transpose_100_perm_0, x = q_9_cast_fp16)[name = tensor("transpose_253")]; tensor var_422_cast_fp16 = matmul(transpose_x = var_422_transpose_x_0, transpose_y = var_422_transpose_y_0, x = transpose_100, y = transpose_101)[name = tensor("op_422_cast_fp16")]; tensor var_423_to_fp16 = const()[name = tensor("op_423_to_fp16"), val = tensor(0x1p-3)]; tensor scores_11_cast_fp16 = mul(x = var_422_cast_fp16, y = var_423_to_fp16)[name = tensor("scores_11_cast_fp16")]; tensor var_381_to_fp16 = const()[name = tensor("op_381_to_fp16"), val = tensor(-inf)]; tensor scores_13_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_11_cast_fp16)[name = tensor("scores_13_cast_fp16")]; tensor probs_5_cast_fp16 = softmax(axis = var_386, x = scores_13_cast_fp16)[name = tensor("probs_5_cast_fp16")]; tensor var_443_transpose_x_0 = const()[name = tensor("op_443_transpose_x_0"), val = tensor(false)]; tensor var_443_transpose_y_0 = const()[name = tensor("op_443_transpose_y_0"), val = tensor(false)]; tensor vT_5_cast_fp16 = transpose(perm = var_420, x = new_v_3_cast_fp16)[name = tensor("transpose_251")]; tensor var_443_cast_fp16 = matmul(transpose_x = var_443_transpose_x_0, transpose_y = var_443_transpose_y_0, x = probs_5_cast_fp16, y = vT_5_cast_fp16)[name = tensor("op_443_cast_fp16")]; tensor var_444 = const()[name = tensor("op_444"), val = tensor([0, 2, 1, 3])]; tensor var_448 = const()[name = tensor("op_448"), val = tensor([1, 111, -1])]; tensor y_7_cast_fp16 = transpose(perm = var_444, x = var_443_cast_fp16)[name = tensor("transpose_250")]; tensor input_35_cast_fp16 = reshape(shape = var_448, x = y_7_cast_fp16)[name = tensor("input_35_cast_fp16")]; tensor dec_layers_1_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_1_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9819072))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10408960))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_6_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_1_self_attention_o_net_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = tensor("linear_6_cast_fp16")]; tensor input_37_cast_fp16 = add(x = x_17_cast_fp16, y = linear_6_cast_fp16)[name = tensor("input_37_cast_fp16")]; tensor x_21_axes_0 = const()[name = tensor("x_21_axes_0"), val = tensor([-1])]; tensor dec_layers_1_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_1_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10410560)))]; tensor x_21_cast_fp16 = layer_norm(axes = x_21_axes_0, epsilon = var_384_to_fp16, gamma = dec_layers_1_norm_xattn_query_weight_to_fp16, x = input_37_cast_fp16)[name = tensor("x_21_cast_fp16")]; tensor memory_3_axes_0 = const()[name = tensor("memory_3_axes_0"), val = tensor([-1])]; tensor dec_layers_1_norm_xattn_memory_weight_to_fp16 = const()[name = tensor("dec_layers_1_norm_xattn_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10412160)))]; tensor memory_3_cast_fp16 = layer_norm(axes = memory_3_axes_0, epsilon = var_384_to_fp16, gamma = dec_layers_1_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor("memory_3_cast_fp16")]; tensor dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10413760))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10512128))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3013056)))]; tensor linear_7_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized, x = x_21_cast_fp16)[name = tensor("linear_7_cast_fp16")]; tensor var_470 = const()[name = tensor("op_470"), val = tensor([1, 111, 1, 128])]; tensor q_13_cast_fp16 = reshape(shape = var_470, x = linear_7_cast_fp16)[name = tensor("q_13_cast_fp16")]; tensor dec_layers_1_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_1_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10512448))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10709120))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3210560)))]; tensor linear_8_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_1_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_3_cast_fp16)[name = tensor("linear_8_cast_fp16")]; tensor var_476 = const()[name = tensor("op_476"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_3_cast_fp16 = reshape(shape = var_476, x = linear_8_cast_fp16)[name = tensor("kv_3_cast_fp16")]; tensor k_3_begin_0 = const()[name = tensor("k_3_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor k_3_end_0 = const()[name = tensor("k_3_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor k_3_end_mask_0 = const()[name = tensor("k_3_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_3_squeeze_mask_0 = const()[name = tensor("k_3_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_3_cast_fp16 = slice_by_index(begin = k_3_begin_0, end = k_3_end_0, end_mask = k_3_end_mask_0, squeeze_mask = k_3_squeeze_mask_0, x = kv_3_cast_fp16)[name = tensor("k_3_cast_fp16")]; tensor k_3_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("k_3_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor v_3_begin_0 = const()[name = tensor("v_3_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor v_3_end_0 = const()[name = tensor("v_3_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor v_3_end_mask_0 = const()[name = tensor("v_3_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_3_squeeze_mask_0 = const()[name = tensor("v_3_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_3_cast_fp16 = slice_by_index(begin = v_3_begin_0, end = v_3_end_0, end_mask = v_3_end_mask_0, squeeze_mask = v_3_squeeze_mask_0, x = kv_3_cast_fp16)[name = tensor("v_3_cast_fp16")]; tensor v_3_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("v_3_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_489 = const()[name = tensor("op_489"), val = tensor([0, 2, -3, -1])]; tensor var_491_transpose_x_0 = const()[name = tensor("op_491_transpose_x_0"), val = tensor(false)]; tensor var_491_transpose_y_0 = const()[name = tensor("op_491_transpose_y_0"), val = tensor(false)]; tensor transpose_102_perm_0 = const()[name = tensor("transpose_102_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_103_perm_0 = const()[name = tensor("transpose_103_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_103 = transpose(perm = transpose_103_perm_0, x = k_3_cast_fp16)[name = tensor("transpose_248")]; tensor transpose_102 = transpose(perm = transpose_102_perm_0, x = q_13_cast_fp16)[name = tensor("transpose_249")]; tensor var_491_cast_fp16 = matmul(transpose_x = var_491_transpose_x_0, transpose_y = var_491_transpose_y_0, x = transpose_102, y = transpose_103)[name = tensor("op_491_cast_fp16")]; tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1.6ap-4)]; tensor scores_17_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor("scores_17_cast_fp16")]; tensor scores_19_cast_fp16 = select(a = var_381_to_fp16, b = scores_17_cast_fp16, cond = var_313_cast_fp16)[name = tensor("scores_19_cast_fp16")]; tensor probs_7_cast_fp16 = softmax(axis = var_386, x = scores_19_cast_fp16)[name = tensor("probs_7_cast_fp16")]; tensor var_501_transpose_x_0 = const()[name = tensor("op_501_transpose_x_0"), val = tensor(false)]; tensor var_501_transpose_y_0 = const()[name = tensor("op_501_transpose_y_0"), val = tensor(false)]; tensor vT_7_cast_fp16 = transpose(perm = var_489, x = v_3_cast_fp16)[name = tensor("transpose_247")]; tensor var_501_cast_fp16 = matmul(transpose_x = var_501_transpose_x_0, transpose_y = var_501_transpose_y_0, x = probs_7_cast_fp16, y = vT_7_cast_fp16)[name = tensor("op_501_cast_fp16")]; tensor var_502 = const()[name = tensor("op_502"), val = tensor([0, 2, 1, 3])]; tensor var_504 = const()[name = tensor("op_504"), val = tensor([1, 111, -1])]; tensor var_503_cast_fp16 = transpose(perm = var_502, x = var_501_cast_fp16)[name = tensor("transpose_246")]; tensor input_39_cast_fp16 = reshape(shape = var_504, x = var_503_cast_fp16)[name = tensor("input_39_cast_fp16")]; tensor dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10709696))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10808064))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized, x = input_39_cast_fp16)[name = tensor("linear_9_cast_fp16")]; tensor input_41_cast_fp16 = add(x = input_37_cast_fp16, y = linear_9_cast_fp16)[name = tensor("input_41_cast_fp16")]; tensor x_23_axes_0 = const()[name = tensor("x_23_axes_0"), val = tensor([-1])]; tensor dec_layers_1_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_1_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10809664)))]; tensor x_23_cast_fp16 = layer_norm(axes = x_23_axes_0, epsilon = var_384_to_fp16, gamma = dec_layers_1_norm_pos_ff_weight_to_fp16, x = input_41_cast_fp16)[name = tensor("x_23_cast_fp16")]; tensor var_521 = const()[name = tensor("op_521"), val = tensor([0, 2, 1])]; tensor y_9_pad_type_0 = const()[name = tensor("y_9_pad_type_0"), val = tensor("valid")]; tensor y_9_strides_0 = const()[name = tensor("y_9_strides_0"), val = tensor([1])]; tensor y_9_pad_0 = const()[name = tensor("y_9_pad_0"), val = tensor([0, 0])]; tensor y_9_dilations_0 = const()[name = tensor("y_9_dilations_0"), val = tensor([1])]; tensor y_9_groups_0 = const()[name = tensor("y_9_groups_0"), val = tensor(1)]; tensor dec_layers_1_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_1_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10811264))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13170624))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5672960)))]; tensor x_27_cast_fp16 = transpose(perm = var_521, x = x_23_cast_fp16)[name = tensor("transpose_245")]; tensor y_9_cast_fp16 = conv(dilations = y_9_dilations_0, groups = y_9_groups_0, pad = y_9_pad_0, pad_type = y_9_pad_type_0, strides = y_9_strides_0, weight = dec_layers_1_pos_ff_proj_weight_to_fp16_quantized, x = x_27_cast_fp16)[name = tensor("y_9_cast_fp16")]; tensor x_29_mode_0 = const()[name = tensor("x_29_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = y_9_cast_fp16)[name = tensor("x_29_cast_fp16")]; tensor y_11_pad_type_0 = const()[name = tensor("y_11_pad_type_0"), val = tensor("valid")]; tensor y_11_strides_0 = const()[name = tensor("y_11_strides_0"), val = tensor([1])]; tensor y_11_pad_0 = const()[name = tensor("y_11_pad_0"), val = tensor([0, 0])]; tensor y_11_dilations_0 = const()[name = tensor("y_11_dilations_0"), val = tensor([1])]; tensor y_11_groups_0 = const()[name = tensor("y_11_groups_0"), val = tensor(1)]; tensor dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13176832))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15536192))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor y_11_cast_fp16 = conv(dilations = y_11_dilations_0, groups = y_11_groups_0, pad = y_11_pad_0, pad_type = y_11_pad_type_0, strides = y_11_strides_0, weight = dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized, x = x_29_cast_fp16)[name = tensor("y_11_cast_fp16")]; tensor var_541 = const()[name = tensor("op_541"), val = tensor([0, 2, 1])]; tensor h_19_cast_fp16 = transpose(perm = var_541, x = y_11_cast_fp16)[name = tensor("transpose_244")]; tensor x_33_cast_fp16 = add(x = input_41_cast_fp16, y = h_19_cast_fp16)[name = tensor("x_33_cast_fp16")]; tensor var_571 = const()[name = tensor("op_571"), val = tensor(-1)]; tensor input_45_axes_0 = const()[name = tensor("input_45_axes_0"), val = tensor([-1])]; tensor dec_layers_2_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_2_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15537792)))]; tensor var_569_to_fp16 = const()[name = tensor("op_569_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_45_cast_fp16 = layer_norm(axes = input_45_axes_0, epsilon = var_569_to_fp16, gamma = dec_layers_2_norm_self_weight_to_fp16, x = x_33_cast_fp16)[name = tensor("input_45_cast_fp16")]; tensor dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15539392))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17308928))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2281088)))]; tensor linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized, x = input_45_cast_fp16)[name = tensor("linear_10_cast_fp16")]; tensor var_590 = const()[name = tensor("op_590"), val = tensor([1, 111, 3, 12, 64])]; tensor qkv_11_cast_fp16 = reshape(shape = var_590, x = linear_10_cast_fp16)[name = tensor("qkv_11_cast_fp16")]; tensor q_17_begin_0 = const()[name = tensor("q_17_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_17_end_0 = const()[name = tensor("q_17_end_0"), val = tensor([1, 111, 1, 12, 64])]; tensor q_17_end_mask_0 = const()[name = tensor("q_17_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_17_squeeze_mask_0 = const()[name = tensor("q_17_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_17_cast_fp16 = slice_by_index(begin = q_17_begin_0, end = q_17_end_0, end_mask = q_17_end_mask_0, squeeze_mask = q_17_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor("q_17_cast_fp16")]; tensor new_k_5_begin_0 = const()[name = tensor("new_k_5_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_5_end_0 = const()[name = tensor("new_k_5_end_0"), val = tensor([1, 111, 2, 12, 64])]; tensor new_k_5_end_mask_0 = const()[name = tensor("new_k_5_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_5_squeeze_mask_0 = const()[name = tensor("new_k_5_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_5_cast_fp16 = slice_by_index(begin = new_k_5_begin_0, end = new_k_5_end_0, end_mask = new_k_5_end_mask_0, squeeze_mask = new_k_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor("new_k_5_cast_fp16")]; tensor new_k_5_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_k_5_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor new_v_5_begin_0 = const()[name = tensor("new_v_5_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_5_end_0 = const()[name = tensor("new_v_5_end_0"), val = tensor([1, 111, 3, 12, 64])]; tensor new_v_5_end_mask_0 = const()[name = tensor("new_v_5_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_5_squeeze_mask_0 = const()[name = tensor("new_v_5_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_5_cast_fp16 = slice_by_index(begin = new_v_5_begin_0, end = new_v_5_end_0, end_mask = new_v_5_end_mask_0, squeeze_mask = new_v_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor("new_v_5_cast_fp16")]; tensor new_v_5_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_v_5_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_605 = const()[name = tensor("op_605"), val = tensor([0, 2, -3, -1])]; tensor var_607_transpose_x_0 = const()[name = tensor("op_607_transpose_x_0"), val = tensor(false)]; tensor var_607_transpose_y_0 = const()[name = tensor("op_607_transpose_y_0"), val = tensor(false)]; tensor transpose_104_perm_0 = const()[name = tensor("transpose_104_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_105_perm_0 = const()[name = tensor("transpose_105_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_105 = transpose(perm = transpose_105_perm_0, x = new_k_5_cast_fp16)[name = tensor("transpose_242")]; tensor transpose_104 = transpose(perm = transpose_104_perm_0, x = q_17_cast_fp16)[name = tensor("transpose_243")]; tensor var_607_cast_fp16 = matmul(transpose_x = var_607_transpose_x_0, transpose_y = var_607_transpose_y_0, x = transpose_104, y = transpose_105)[name = tensor("op_607_cast_fp16")]; tensor var_608_to_fp16 = const()[name = tensor("op_608_to_fp16"), val = tensor(0x1p-3)]; tensor scores_21_cast_fp16 = mul(x = var_607_cast_fp16, y = var_608_to_fp16)[name = tensor("scores_21_cast_fp16")]; tensor var_566_to_fp16 = const()[name = tensor("op_566_to_fp16"), val = tensor(-inf)]; tensor scores_23_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_21_cast_fp16)[name = tensor("scores_23_cast_fp16")]; tensor probs_9_cast_fp16 = softmax(axis = var_571, x = scores_23_cast_fp16)[name = tensor("probs_9_cast_fp16")]; tensor var_628_transpose_x_0 = const()[name = tensor("op_628_transpose_x_0"), val = tensor(false)]; tensor var_628_transpose_y_0 = const()[name = tensor("op_628_transpose_y_0"), val = tensor(false)]; tensor vT_9_cast_fp16 = transpose(perm = var_605, x = new_v_5_cast_fp16)[name = tensor("transpose_241")]; tensor var_628_cast_fp16 = matmul(transpose_x = var_628_transpose_x_0, transpose_y = var_628_transpose_y_0, x = probs_9_cast_fp16, y = vT_9_cast_fp16)[name = tensor("op_628_cast_fp16")]; tensor var_629 = const()[name = tensor("op_629"), val = tensor([0, 2, 1, 3])]; tensor var_633 = const()[name = tensor("op_633"), val = tensor([1, 111, -1])]; tensor y_13_cast_fp16 = transpose(perm = var_629, x = var_628_cast_fp16)[name = tensor("transpose_240")]; tensor input_47_cast_fp16 = reshape(shape = var_633, x = y_13_cast_fp16)[name = tensor("input_47_cast_fp16")]; tensor dec_layers_2_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_2_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17313600))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17903488))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_11_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_2_self_attention_o_net_weight_to_fp16_quantized, x = input_47_cast_fp16)[name = tensor("linear_11_cast_fp16")]; tensor input_49_cast_fp16 = add(x = x_33_cast_fp16, y = linear_11_cast_fp16)[name = tensor("input_49_cast_fp16")]; tensor x_37_axes_0 = const()[name = tensor("x_37_axes_0"), val = tensor([-1])]; tensor dec_layers_2_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_2_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17905088)))]; tensor x_37_cast_fp16 = layer_norm(axes = x_37_axes_0, epsilon = var_569_to_fp16, gamma = dec_layers_2_norm_xattn_query_weight_to_fp16, x = input_49_cast_fp16)[name = tensor("x_37_cast_fp16")]; tensor memory_5_axes_0 = const()[name = tensor("memory_5_axes_0"), val = tensor([-1])]; tensor dec_layers_2_norm_xattn_memory_weight_to_fp16 = const()[name = tensor("dec_layers_2_norm_xattn_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17906688)))]; tensor memory_5_cast_fp16 = layer_norm(axes = memory_5_axes_0, epsilon = var_569_to_fp16, gamma = dec_layers_2_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor("memory_5_cast_fp16")]; tensor dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17908288))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18006656))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3013056)))]; tensor linear_12_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized, x = x_37_cast_fp16)[name = tensor("linear_12_cast_fp16")]; tensor var_655 = const()[name = tensor("op_655"), val = tensor([1, 111, 1, 128])]; tensor q_21_cast_fp16 = reshape(shape = var_655, x = linear_12_cast_fp16)[name = tensor("q_21_cast_fp16")]; tensor dec_layers_2_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_2_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18006976))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18203648))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3210560)))]; tensor linear_13_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_2_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_5_cast_fp16)[name = tensor("linear_13_cast_fp16")]; tensor var_661 = const()[name = tensor("op_661"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_5_cast_fp16 = reshape(shape = var_661, x = linear_13_cast_fp16)[name = tensor("kv_5_cast_fp16")]; tensor k_5_begin_0 = const()[name = tensor("k_5_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor k_5_end_0 = const()[name = tensor("k_5_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor k_5_end_mask_0 = const()[name = tensor("k_5_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_5_squeeze_mask_0 = const()[name = tensor("k_5_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_5_cast_fp16 = slice_by_index(begin = k_5_begin_0, end = k_5_end_0, end_mask = k_5_end_mask_0, squeeze_mask = k_5_squeeze_mask_0, x = kv_5_cast_fp16)[name = tensor("k_5_cast_fp16")]; tensor k_5_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("k_5_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor v_5_begin_0 = const()[name = tensor("v_5_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor v_5_end_0 = const()[name = tensor("v_5_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor v_5_end_mask_0 = const()[name = tensor("v_5_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_5_squeeze_mask_0 = const()[name = tensor("v_5_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_5_cast_fp16 = slice_by_index(begin = v_5_begin_0, end = v_5_end_0, end_mask = v_5_end_mask_0, squeeze_mask = v_5_squeeze_mask_0, x = kv_5_cast_fp16)[name = tensor("v_5_cast_fp16")]; tensor v_5_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("v_5_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_674 = const()[name = tensor("op_674"), val = tensor([0, 2, -3, -1])]; tensor var_676_transpose_x_0 = const()[name = tensor("op_676_transpose_x_0"), val = tensor(false)]; tensor var_676_transpose_y_0 = const()[name = tensor("op_676_transpose_y_0"), val = tensor(false)]; tensor transpose_106_perm_0 = const()[name = tensor("transpose_106_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_107_perm_0 = const()[name = tensor("transpose_107_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_107 = transpose(perm = transpose_107_perm_0, x = k_5_cast_fp16)[name = tensor("transpose_238")]; tensor transpose_106 = transpose(perm = transpose_106_perm_0, x = q_21_cast_fp16)[name = tensor("transpose_239")]; tensor var_676_cast_fp16 = matmul(transpose_x = var_676_transpose_x_0, transpose_y = var_676_transpose_y_0, x = transpose_106, y = transpose_107)[name = tensor("op_676_cast_fp16")]; tensor var_677_to_fp16 = const()[name = tensor("op_677_to_fp16"), val = tensor(0x1.6ap-4)]; tensor scores_27_cast_fp16 = mul(x = var_676_cast_fp16, y = var_677_to_fp16)[name = tensor("scores_27_cast_fp16")]; tensor scores_29_cast_fp16 = select(a = var_566_to_fp16, b = scores_27_cast_fp16, cond = var_313_cast_fp16)[name = tensor("scores_29_cast_fp16")]; tensor probs_11_cast_fp16 = softmax(axis = var_571, x = scores_29_cast_fp16)[name = tensor("probs_11_cast_fp16")]; tensor var_686_transpose_x_0 = const()[name = tensor("op_686_transpose_x_0"), val = tensor(false)]; tensor var_686_transpose_y_0 = const()[name = tensor("op_686_transpose_y_0"), val = tensor(false)]; tensor vT_11_cast_fp16 = transpose(perm = var_674, x = v_5_cast_fp16)[name = tensor("transpose_237")]; tensor var_686_cast_fp16 = matmul(transpose_x = var_686_transpose_x_0, transpose_y = var_686_transpose_y_0, x = probs_11_cast_fp16, y = vT_11_cast_fp16)[name = tensor("op_686_cast_fp16")]; tensor var_687 = const()[name = tensor("op_687"), val = tensor([0, 2, 1, 3])]; tensor var_689 = const()[name = tensor("op_689"), val = tensor([1, 111, -1])]; tensor var_688_cast_fp16 = transpose(perm = var_687, x = var_686_cast_fp16)[name = tensor("transpose_236")]; tensor input_51_cast_fp16 = reshape(shape = var_689, x = var_688_cast_fp16)[name = tensor("input_51_cast_fp16")]; tensor dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18204224))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18302592))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_14_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized, x = input_51_cast_fp16)[name = tensor("linear_14_cast_fp16")]; tensor input_53_cast_fp16 = add(x = input_49_cast_fp16, y = linear_14_cast_fp16)[name = tensor("input_53_cast_fp16")]; tensor x_39_axes_0 = const()[name = tensor("x_39_axes_0"), val = tensor([-1])]; tensor dec_layers_2_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_2_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18304192)))]; tensor x_39_cast_fp16 = layer_norm(axes = x_39_axes_0, epsilon = var_569_to_fp16, gamma = dec_layers_2_norm_pos_ff_weight_to_fp16, x = input_53_cast_fp16)[name = tensor("x_39_cast_fp16")]; tensor var_706 = const()[name = tensor("op_706"), val = tensor([0, 2, 1])]; tensor y_15_pad_type_0 = const()[name = tensor("y_15_pad_type_0"), val = tensor("valid")]; tensor y_15_strides_0 = const()[name = tensor("y_15_strides_0"), val = tensor([1])]; tensor y_15_pad_0 = const()[name = tensor("y_15_pad_0"), val = tensor([0, 0])]; tensor y_15_dilations_0 = const()[name = tensor("y_15_dilations_0"), val = tensor([1])]; tensor y_15_groups_0 = const()[name = tensor("y_15_groups_0"), val = tensor(1)]; tensor dec_layers_2_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_2_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18305792))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20665152))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5672960)))]; tensor x_43_cast_fp16 = transpose(perm = var_706, x = x_39_cast_fp16)[name = tensor("transpose_235")]; tensor y_15_cast_fp16 = conv(dilations = y_15_dilations_0, groups = y_15_groups_0, pad = y_15_pad_0, pad_type = y_15_pad_type_0, strides = y_15_strides_0, weight = dec_layers_2_pos_ff_proj_weight_to_fp16_quantized, x = x_43_cast_fp16)[name = tensor("y_15_cast_fp16")]; tensor x_45_mode_0 = const()[name = tensor("x_45_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_45_cast_fp16 = gelu(mode = x_45_mode_0, x = y_15_cast_fp16)[name = tensor("x_45_cast_fp16")]; tensor y_17_pad_type_0 = const()[name = tensor("y_17_pad_type_0"), val = tensor("valid")]; tensor y_17_strides_0 = const()[name = tensor("y_17_strides_0"), val = tensor([1])]; tensor y_17_pad_0 = const()[name = tensor("y_17_pad_0"), val = tensor([0, 0])]; tensor y_17_dilations_0 = const()[name = tensor("y_17_dilations_0"), val = tensor([1])]; tensor y_17_groups_0 = const()[name = tensor("y_17_groups_0"), val = tensor(1)]; tensor dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20671360))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23030720))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor y_17_cast_fp16 = conv(dilations = y_17_dilations_0, groups = y_17_groups_0, pad = y_17_pad_0, pad_type = y_17_pad_type_0, strides = y_17_strides_0, weight = dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized, x = x_45_cast_fp16)[name = tensor("y_17_cast_fp16")]; tensor var_726 = const()[name = tensor("op_726"), val = tensor([0, 2, 1])]; tensor h_29_cast_fp16 = transpose(perm = var_726, x = y_17_cast_fp16)[name = tensor("transpose_234")]; tensor x_49_cast_fp16 = add(x = input_53_cast_fp16, y = h_29_cast_fp16)[name = tensor("x_49_cast_fp16")]; tensor var_756 = const()[name = tensor("op_756"), val = tensor(-1)]; tensor input_57_axes_0 = const()[name = tensor("input_57_axes_0"), val = tensor([-1])]; tensor dec_layers_3_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_3_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23032320)))]; tensor var_754_to_fp16 = const()[name = tensor("op_754_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_57_cast_fp16 = layer_norm(axes = input_57_axes_0, epsilon = var_754_to_fp16, gamma = dec_layers_3_norm_self_weight_to_fp16, x = x_49_cast_fp16)[name = tensor("input_57_cast_fp16")]; tensor dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23033920))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24803456))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2281088)))]; tensor linear_15_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized, x = input_57_cast_fp16)[name = tensor("linear_15_cast_fp16")]; tensor var_775 = const()[name = tensor("op_775"), val = tensor([1, 111, 3, 12, 64])]; tensor qkv_15_cast_fp16 = reshape(shape = var_775, x = linear_15_cast_fp16)[name = tensor("qkv_15_cast_fp16")]; tensor q_25_begin_0 = const()[name = tensor("q_25_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_25_end_0 = const()[name = tensor("q_25_end_0"), val = tensor([1, 111, 1, 12, 64])]; tensor q_25_end_mask_0 = const()[name = tensor("q_25_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_25_squeeze_mask_0 = const()[name = tensor("q_25_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_25_cast_fp16 = slice_by_index(begin = q_25_begin_0, end = q_25_end_0, end_mask = q_25_end_mask_0, squeeze_mask = q_25_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor("q_25_cast_fp16")]; tensor new_k_7_begin_0 = const()[name = tensor("new_k_7_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_7_end_0 = const()[name = tensor("new_k_7_end_0"), val = tensor([1, 111, 2, 12, 64])]; tensor new_k_7_end_mask_0 = const()[name = tensor("new_k_7_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_7_squeeze_mask_0 = const()[name = tensor("new_k_7_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_7_cast_fp16 = slice_by_index(begin = new_k_7_begin_0, end = new_k_7_end_0, end_mask = new_k_7_end_mask_0, squeeze_mask = new_k_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor("new_k_7_cast_fp16")]; tensor new_k_7_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_k_7_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor new_v_7_begin_0 = const()[name = tensor("new_v_7_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_7_end_0 = const()[name = tensor("new_v_7_end_0"), val = tensor([1, 111, 3, 12, 64])]; tensor new_v_7_end_mask_0 = const()[name = tensor("new_v_7_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_7_squeeze_mask_0 = const()[name = tensor("new_v_7_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_7_cast_fp16 = slice_by_index(begin = new_v_7_begin_0, end = new_v_7_end_0, end_mask = new_v_7_end_mask_0, squeeze_mask = new_v_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor("new_v_7_cast_fp16")]; tensor new_v_7_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_v_7_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_790 = const()[name = tensor("op_790"), val = tensor([0, 2, -3, -1])]; tensor var_792_transpose_x_0 = const()[name = tensor("op_792_transpose_x_0"), val = tensor(false)]; tensor var_792_transpose_y_0 = const()[name = tensor("op_792_transpose_y_0"), val = tensor(false)]; tensor transpose_108_perm_0 = const()[name = tensor("transpose_108_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_109_perm_0 = const()[name = tensor("transpose_109_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_109 = transpose(perm = transpose_109_perm_0, x = new_k_7_cast_fp16)[name = tensor("transpose_232")]; tensor transpose_108 = transpose(perm = transpose_108_perm_0, x = q_25_cast_fp16)[name = tensor("transpose_233")]; tensor var_792_cast_fp16 = matmul(transpose_x = var_792_transpose_x_0, transpose_y = var_792_transpose_y_0, x = transpose_108, y = transpose_109)[name = tensor("op_792_cast_fp16")]; tensor var_793_to_fp16 = const()[name = tensor("op_793_to_fp16"), val = tensor(0x1p-3)]; tensor scores_31_cast_fp16 = mul(x = var_792_cast_fp16, y = var_793_to_fp16)[name = tensor("scores_31_cast_fp16")]; tensor var_751_to_fp16 = const()[name = tensor("op_751_to_fp16"), val = tensor(-inf)]; tensor scores_33_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_31_cast_fp16)[name = tensor("scores_33_cast_fp16")]; tensor probs_13_cast_fp16 = softmax(axis = var_756, x = scores_33_cast_fp16)[name = tensor("probs_13_cast_fp16")]; tensor var_813_transpose_x_0 = const()[name = tensor("op_813_transpose_x_0"), val = tensor(false)]; tensor var_813_transpose_y_0 = const()[name = tensor("op_813_transpose_y_0"), val = tensor(false)]; tensor vT_13_cast_fp16 = transpose(perm = var_790, x = new_v_7_cast_fp16)[name = tensor("transpose_231")]; tensor var_813_cast_fp16 = matmul(transpose_x = var_813_transpose_x_0, transpose_y = var_813_transpose_y_0, x = probs_13_cast_fp16, y = vT_13_cast_fp16)[name = tensor("op_813_cast_fp16")]; tensor var_814 = const()[name = tensor("op_814"), val = tensor([0, 2, 1, 3])]; tensor var_818 = const()[name = tensor("op_818"), val = tensor([1, 111, -1])]; tensor y_19_cast_fp16 = transpose(perm = var_814, x = var_813_cast_fp16)[name = tensor("transpose_230")]; tensor input_59_cast_fp16 = reshape(shape = var_818, x = y_19_cast_fp16)[name = tensor("input_59_cast_fp16")]; tensor dec_layers_3_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_3_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24808128))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25398016))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_16_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_3_self_attention_o_net_weight_to_fp16_quantized, x = input_59_cast_fp16)[name = tensor("linear_16_cast_fp16")]; tensor input_61_cast_fp16 = add(x = x_49_cast_fp16, y = linear_16_cast_fp16)[name = tensor("input_61_cast_fp16")]; tensor x_53_axes_0 = const()[name = tensor("x_53_axes_0"), val = tensor([-1])]; tensor dec_layers_3_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_3_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25399616)))]; tensor x_53_cast_fp16 = layer_norm(axes = x_53_axes_0, epsilon = var_754_to_fp16, gamma = dec_layers_3_norm_xattn_query_weight_to_fp16, x = input_61_cast_fp16)[name = tensor("x_53_cast_fp16")]; tensor memory_7_axes_0 = const()[name = tensor("memory_7_axes_0"), val = tensor([-1])]; tensor dec_layers_3_norm_xattn_memory_weight_to_fp16 = const()[name = tensor("dec_layers_3_norm_xattn_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25401216)))]; tensor memory_7_cast_fp16 = layer_norm(axes = memory_7_axes_0, epsilon = var_754_to_fp16, gamma = dec_layers_3_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor("memory_7_cast_fp16")]; tensor dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25402816))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25501184))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3013056)))]; tensor linear_17_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized, x = x_53_cast_fp16)[name = tensor("linear_17_cast_fp16")]; tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 111, 1, 128])]; tensor q_29_cast_fp16 = reshape(shape = var_840, x = linear_17_cast_fp16)[name = tensor("q_29_cast_fp16")]; tensor dec_layers_3_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_3_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25501504))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25698176))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3210560)))]; tensor linear_18_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_3_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_7_cast_fp16)[name = tensor("linear_18_cast_fp16")]; tensor var_846 = const()[name = tensor("op_846"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_7_cast_fp16 = reshape(shape = var_846, x = linear_18_cast_fp16)[name = tensor("kv_7_cast_fp16")]; tensor k_7_begin_0 = const()[name = tensor("k_7_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor k_7_end_0 = const()[name = tensor("k_7_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor k_7_end_mask_0 = const()[name = tensor("k_7_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_7_squeeze_mask_0 = const()[name = tensor("k_7_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_7_cast_fp16 = slice_by_index(begin = k_7_begin_0, end = k_7_end_0, end_mask = k_7_end_mask_0, squeeze_mask = k_7_squeeze_mask_0, x = kv_7_cast_fp16)[name = tensor("k_7_cast_fp16")]; tensor k_7_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("k_7_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor v_7_begin_0 = const()[name = tensor("v_7_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor v_7_end_0 = const()[name = tensor("v_7_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor v_7_end_mask_0 = const()[name = tensor("v_7_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_7_squeeze_mask_0 = const()[name = tensor("v_7_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_7_cast_fp16 = slice_by_index(begin = v_7_begin_0, end = v_7_end_0, end_mask = v_7_end_mask_0, squeeze_mask = v_7_squeeze_mask_0, x = kv_7_cast_fp16)[name = tensor("v_7_cast_fp16")]; tensor v_7_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("v_7_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_859 = const()[name = tensor("op_859"), val = tensor([0, 2, -3, -1])]; tensor var_861_transpose_x_0 = const()[name = tensor("op_861_transpose_x_0"), val = tensor(false)]; tensor var_861_transpose_y_0 = const()[name = tensor("op_861_transpose_y_0"), val = tensor(false)]; tensor transpose_110_perm_0 = const()[name = tensor("transpose_110_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_111_perm_0 = const()[name = tensor("transpose_111_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_111 = transpose(perm = transpose_111_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_228")]; tensor transpose_110 = transpose(perm = transpose_110_perm_0, x = q_29_cast_fp16)[name = tensor("transpose_229")]; tensor var_861_cast_fp16 = matmul(transpose_x = var_861_transpose_x_0, transpose_y = var_861_transpose_y_0, x = transpose_110, y = transpose_111)[name = tensor("op_861_cast_fp16")]; tensor var_862_to_fp16 = const()[name = tensor("op_862_to_fp16"), val = tensor(0x1.6ap-4)]; tensor scores_37_cast_fp16 = mul(x = var_861_cast_fp16, y = var_862_to_fp16)[name = tensor("scores_37_cast_fp16")]; tensor scores_39_cast_fp16 = select(a = var_751_to_fp16, b = scores_37_cast_fp16, cond = var_313_cast_fp16)[name = tensor("scores_39_cast_fp16")]; tensor probs_15_cast_fp16 = softmax(axis = var_756, x = scores_39_cast_fp16)[name = tensor("probs_15_cast_fp16")]; tensor var_871_transpose_x_0 = const()[name = tensor("op_871_transpose_x_0"), val = tensor(false)]; tensor var_871_transpose_y_0 = const()[name = tensor("op_871_transpose_y_0"), val = tensor(false)]; tensor vT_15_cast_fp16 = transpose(perm = var_859, x = v_7_cast_fp16)[name = tensor("transpose_227")]; tensor var_871_cast_fp16 = matmul(transpose_x = var_871_transpose_x_0, transpose_y = var_871_transpose_y_0, x = probs_15_cast_fp16, y = vT_15_cast_fp16)[name = tensor("op_871_cast_fp16")]; tensor var_872 = const()[name = tensor("op_872"), val = tensor([0, 2, 1, 3])]; tensor var_874 = const()[name = tensor("op_874"), val = tensor([1, 111, -1])]; tensor var_873_cast_fp16 = transpose(perm = var_872, x = var_871_cast_fp16)[name = tensor("transpose_226")]; tensor input_63_cast_fp16 = reshape(shape = var_874, x = var_873_cast_fp16)[name = tensor("input_63_cast_fp16")]; tensor dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25698752))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25797120))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized, x = input_63_cast_fp16)[name = tensor("linear_19_cast_fp16")]; tensor input_65_cast_fp16 = add(x = input_61_cast_fp16, y = linear_19_cast_fp16)[name = tensor("input_65_cast_fp16")]; tensor x_55_axes_0 = const()[name = tensor("x_55_axes_0"), val = tensor([-1])]; tensor dec_layers_3_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_3_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25798720)))]; tensor x_55_cast_fp16 = layer_norm(axes = x_55_axes_0, epsilon = var_754_to_fp16, gamma = dec_layers_3_norm_pos_ff_weight_to_fp16, x = input_65_cast_fp16)[name = tensor("x_55_cast_fp16")]; tensor var_891 = const()[name = tensor("op_891"), val = tensor([0, 2, 1])]; tensor y_21_pad_type_0 = const()[name = tensor("y_21_pad_type_0"), val = tensor("valid")]; tensor y_21_strides_0 = const()[name = tensor("y_21_strides_0"), val = tensor([1])]; tensor y_21_pad_0 = const()[name = tensor("y_21_pad_0"), val = tensor([0, 0])]; tensor y_21_dilations_0 = const()[name = tensor("y_21_dilations_0"), val = tensor([1])]; tensor y_21_groups_0 = const()[name = tensor("y_21_groups_0"), val = tensor(1)]; tensor dec_layers_3_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_3_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25800320))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28159680))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5672960)))]; tensor x_59_cast_fp16 = transpose(perm = var_891, x = x_55_cast_fp16)[name = tensor("transpose_225")]; tensor y_21_cast_fp16 = conv(dilations = y_21_dilations_0, groups = y_21_groups_0, pad = y_21_pad_0, pad_type = y_21_pad_type_0, strides = y_21_strides_0, weight = dec_layers_3_pos_ff_proj_weight_to_fp16_quantized, x = x_59_cast_fp16)[name = tensor("y_21_cast_fp16")]; tensor x_61_mode_0 = const()[name = tensor("x_61_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_61_cast_fp16 = gelu(mode = x_61_mode_0, x = y_21_cast_fp16)[name = tensor("x_61_cast_fp16")]; tensor y_23_pad_type_0 = const()[name = tensor("y_23_pad_type_0"), val = tensor("valid")]; tensor y_23_strides_0 = const()[name = tensor("y_23_strides_0"), val = tensor([1])]; tensor y_23_pad_0 = const()[name = tensor("y_23_pad_0"), val = tensor([0, 0])]; tensor y_23_dilations_0 = const()[name = tensor("y_23_dilations_0"), val = tensor([1])]; tensor y_23_groups_0 = const()[name = tensor("y_23_groups_0"), val = tensor(1)]; tensor dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28165888))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30525248))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor y_23_cast_fp16 = conv(dilations = y_23_dilations_0, groups = y_23_groups_0, pad = y_23_pad_0, pad_type = y_23_pad_type_0, strides = y_23_strides_0, weight = dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized, x = x_61_cast_fp16)[name = tensor("y_23_cast_fp16")]; tensor var_911 = const()[name = tensor("op_911"), val = tensor([0, 2, 1])]; tensor h_39_cast_fp16 = transpose(perm = var_911, x = y_23_cast_fp16)[name = tensor("transpose_224")]; tensor x_65_cast_fp16 = add(x = input_65_cast_fp16, y = h_39_cast_fp16)[name = tensor("x_65_cast_fp16")]; tensor var_941 = const()[name = tensor("op_941"), val = tensor(-1)]; tensor input_69_axes_0 = const()[name = tensor("input_69_axes_0"), val = tensor([-1])]; tensor dec_layers_4_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_4_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30526848)))]; tensor var_939_to_fp16 = const()[name = tensor("op_939_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_69_cast_fp16 = layer_norm(axes = input_69_axes_0, epsilon = var_939_to_fp16, gamma = dec_layers_4_norm_self_weight_to_fp16, x = x_65_cast_fp16)[name = tensor("input_69_cast_fp16")]; tensor dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30528448))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32297984))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2281088)))]; tensor linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized, x = input_69_cast_fp16)[name = tensor("linear_20_cast_fp16")]; tensor var_960 = const()[name = tensor("op_960"), val = tensor([1, 111, 3, 12, 64])]; tensor qkv_19_cast_fp16 = reshape(shape = var_960, x = linear_20_cast_fp16)[name = tensor("qkv_19_cast_fp16")]; tensor q_33_begin_0 = const()[name = tensor("q_33_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_33_end_0 = const()[name = tensor("q_33_end_0"), val = tensor([1, 111, 1, 12, 64])]; tensor q_33_end_mask_0 = const()[name = tensor("q_33_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_33_squeeze_mask_0 = const()[name = tensor("q_33_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_33_cast_fp16 = slice_by_index(begin = q_33_begin_0, end = q_33_end_0, end_mask = q_33_end_mask_0, squeeze_mask = q_33_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor("q_33_cast_fp16")]; tensor new_k_9_begin_0 = const()[name = tensor("new_k_9_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_9_end_0 = const()[name = tensor("new_k_9_end_0"), val = tensor([1, 111, 2, 12, 64])]; tensor new_k_9_end_mask_0 = const()[name = tensor("new_k_9_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_9_squeeze_mask_0 = const()[name = tensor("new_k_9_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_9_cast_fp16 = slice_by_index(begin = new_k_9_begin_0, end = new_k_9_end_0, end_mask = new_k_9_end_mask_0, squeeze_mask = new_k_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor("new_k_9_cast_fp16")]; tensor new_k_9_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_k_9_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor new_v_9_begin_0 = const()[name = tensor("new_v_9_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_9_end_0 = const()[name = tensor("new_v_9_end_0"), val = tensor([1, 111, 3, 12, 64])]; tensor new_v_9_end_mask_0 = const()[name = tensor("new_v_9_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_9_squeeze_mask_0 = const()[name = tensor("new_v_9_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_9_cast_fp16 = slice_by_index(begin = new_v_9_begin_0, end = new_v_9_end_0, end_mask = new_v_9_end_mask_0, squeeze_mask = new_v_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor("new_v_9_cast_fp16")]; tensor new_v_9_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_v_9_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_975 = const()[name = tensor("op_975"), val = tensor([0, 2, -3, -1])]; tensor var_977_transpose_x_0 = const()[name = tensor("op_977_transpose_x_0"), val = tensor(false)]; tensor var_977_transpose_y_0 = const()[name = tensor("op_977_transpose_y_0"), val = tensor(false)]; tensor transpose_112_perm_0 = const()[name = tensor("transpose_112_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_113_perm_0 = const()[name = tensor("transpose_113_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_113 = transpose(perm = transpose_113_perm_0, x = new_k_9_cast_fp16)[name = tensor("transpose_222")]; tensor transpose_112 = transpose(perm = transpose_112_perm_0, x = q_33_cast_fp16)[name = tensor("transpose_223")]; tensor var_977_cast_fp16 = matmul(transpose_x = var_977_transpose_x_0, transpose_y = var_977_transpose_y_0, x = transpose_112, y = transpose_113)[name = tensor("op_977_cast_fp16")]; tensor var_978_to_fp16 = const()[name = tensor("op_978_to_fp16"), val = tensor(0x1p-3)]; tensor scores_41_cast_fp16 = mul(x = var_977_cast_fp16, y = var_978_to_fp16)[name = tensor("scores_41_cast_fp16")]; tensor var_936_to_fp16 = const()[name = tensor("op_936_to_fp16"), val = tensor(-inf)]; tensor scores_43_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_41_cast_fp16)[name = tensor("scores_43_cast_fp16")]; tensor probs_17_cast_fp16 = softmax(axis = var_941, x = scores_43_cast_fp16)[name = tensor("probs_17_cast_fp16")]; tensor var_998_transpose_x_0 = const()[name = tensor("op_998_transpose_x_0"), val = tensor(false)]; tensor var_998_transpose_y_0 = const()[name = tensor("op_998_transpose_y_0"), val = tensor(false)]; tensor vT_17_cast_fp16 = transpose(perm = var_975, x = new_v_9_cast_fp16)[name = tensor("transpose_221")]; tensor var_998_cast_fp16 = matmul(transpose_x = var_998_transpose_x_0, transpose_y = var_998_transpose_y_0, x = probs_17_cast_fp16, y = vT_17_cast_fp16)[name = tensor("op_998_cast_fp16")]; tensor var_999 = const()[name = tensor("op_999"), val = tensor([0, 2, 1, 3])]; tensor var_1003 = const()[name = tensor("op_1003"), val = tensor([1, 111, -1])]; tensor y_25_cast_fp16 = transpose(perm = var_999, x = var_998_cast_fp16)[name = tensor("transpose_220")]; tensor input_71_cast_fp16 = reshape(shape = var_1003, x = y_25_cast_fp16)[name = tensor("input_71_cast_fp16")]; tensor dec_layers_4_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_4_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32302656))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32892544))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_21_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_4_self_attention_o_net_weight_to_fp16_quantized, x = input_71_cast_fp16)[name = tensor("linear_21_cast_fp16")]; tensor input_73_cast_fp16 = add(x = x_65_cast_fp16, y = linear_21_cast_fp16)[name = tensor("input_73_cast_fp16")]; tensor x_69_axes_0 = const()[name = tensor("x_69_axes_0"), val = tensor([-1])]; tensor dec_layers_4_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_4_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32894144)))]; tensor x_69_cast_fp16 = layer_norm(axes = x_69_axes_0, epsilon = var_939_to_fp16, gamma = dec_layers_4_norm_xattn_query_weight_to_fp16, x = input_73_cast_fp16)[name = tensor("x_69_cast_fp16")]; tensor memory_9_axes_0 = const()[name = tensor("memory_9_axes_0"), val = tensor([-1])]; tensor dec_layers_4_norm_xattn_memory_weight_to_fp16 = const()[name = tensor("dec_layers_4_norm_xattn_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32895744)))]; tensor memory_9_cast_fp16 = layer_norm(axes = memory_9_axes_0, epsilon = var_939_to_fp16, gamma = dec_layers_4_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor("memory_9_cast_fp16")]; tensor dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32897344))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32995712))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3013056)))]; tensor linear_22_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized, x = x_69_cast_fp16)[name = tensor("linear_22_cast_fp16")]; tensor var_1025 = const()[name = tensor("op_1025"), val = tensor([1, 111, 1, 128])]; tensor q_37_cast_fp16 = reshape(shape = var_1025, x = linear_22_cast_fp16)[name = tensor("q_37_cast_fp16")]; tensor dec_layers_4_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_4_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32996032))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33192704))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3210560)))]; tensor linear_23_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_4_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_9_cast_fp16)[name = tensor("linear_23_cast_fp16")]; tensor var_1031 = const()[name = tensor("op_1031"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_9_cast_fp16 = reshape(shape = var_1031, x = linear_23_cast_fp16)[name = tensor("kv_9_cast_fp16")]; tensor k_9_begin_0 = const()[name = tensor("k_9_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor k_9_end_0 = const()[name = tensor("k_9_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor k_9_end_mask_0 = const()[name = tensor("k_9_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_9_squeeze_mask_0 = const()[name = tensor("k_9_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_9_cast_fp16 = slice_by_index(begin = k_9_begin_0, end = k_9_end_0, end_mask = k_9_end_mask_0, squeeze_mask = k_9_squeeze_mask_0, x = kv_9_cast_fp16)[name = tensor("k_9_cast_fp16")]; tensor k_9_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("k_9_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor v_9_begin_0 = const()[name = tensor("v_9_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor v_9_end_0 = const()[name = tensor("v_9_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor v_9_end_mask_0 = const()[name = tensor("v_9_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_9_squeeze_mask_0 = const()[name = tensor("v_9_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_9_cast_fp16 = slice_by_index(begin = v_9_begin_0, end = v_9_end_0, end_mask = v_9_end_mask_0, squeeze_mask = v_9_squeeze_mask_0, x = kv_9_cast_fp16)[name = tensor("v_9_cast_fp16")]; tensor v_9_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("v_9_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_1044 = const()[name = tensor("op_1044"), val = tensor([0, 2, -3, -1])]; tensor var_1046_transpose_x_0 = const()[name = tensor("op_1046_transpose_x_0"), val = tensor(false)]; tensor var_1046_transpose_y_0 = const()[name = tensor("op_1046_transpose_y_0"), val = tensor(false)]; tensor transpose_114_perm_0 = const()[name = tensor("transpose_114_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_115_perm_0 = const()[name = tensor("transpose_115_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_115 = transpose(perm = transpose_115_perm_0, x = k_9_cast_fp16)[name = tensor("transpose_218")]; tensor transpose_114 = transpose(perm = transpose_114_perm_0, x = q_37_cast_fp16)[name = tensor("transpose_219")]; tensor var_1046_cast_fp16 = matmul(transpose_x = var_1046_transpose_x_0, transpose_y = var_1046_transpose_y_0, x = transpose_114, y = transpose_115)[name = tensor("op_1046_cast_fp16")]; tensor var_1047_to_fp16 = const()[name = tensor("op_1047_to_fp16"), val = tensor(0x1.6ap-4)]; tensor scores_47_cast_fp16 = mul(x = var_1046_cast_fp16, y = var_1047_to_fp16)[name = tensor("scores_47_cast_fp16")]; tensor scores_49_cast_fp16 = select(a = var_936_to_fp16, b = scores_47_cast_fp16, cond = var_313_cast_fp16)[name = tensor("scores_49_cast_fp16")]; tensor probs_19_cast_fp16 = softmax(axis = var_941, x = scores_49_cast_fp16)[name = tensor("probs_19_cast_fp16")]; tensor var_1056_transpose_x_0 = const()[name = tensor("op_1056_transpose_x_0"), val = tensor(false)]; tensor var_1056_transpose_y_0 = const()[name = tensor("op_1056_transpose_y_0"), val = tensor(false)]; tensor vT_19_cast_fp16 = transpose(perm = var_1044, x = v_9_cast_fp16)[name = tensor("transpose_217")]; tensor var_1056_cast_fp16 = matmul(transpose_x = var_1056_transpose_x_0, transpose_y = var_1056_transpose_y_0, x = probs_19_cast_fp16, y = vT_19_cast_fp16)[name = tensor("op_1056_cast_fp16")]; tensor var_1057 = const()[name = tensor("op_1057"), val = tensor([0, 2, 1, 3])]; tensor var_1059 = const()[name = tensor("op_1059"), val = tensor([1, 111, -1])]; tensor var_1058_cast_fp16 = transpose(perm = var_1057, x = var_1056_cast_fp16)[name = tensor("transpose_216")]; tensor input_75_cast_fp16 = reshape(shape = var_1059, x = var_1058_cast_fp16)[name = tensor("input_75_cast_fp16")]; tensor dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33193280))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33291648))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_24_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = tensor("linear_24_cast_fp16")]; tensor input_77_cast_fp16 = add(x = input_73_cast_fp16, y = linear_24_cast_fp16)[name = tensor("input_77_cast_fp16")]; tensor x_71_axes_0 = const()[name = tensor("x_71_axes_0"), val = tensor([-1])]; tensor dec_layers_4_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_4_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33293248)))]; tensor x_71_cast_fp16 = layer_norm(axes = x_71_axes_0, epsilon = var_939_to_fp16, gamma = dec_layers_4_norm_pos_ff_weight_to_fp16, x = input_77_cast_fp16)[name = tensor("x_71_cast_fp16")]; tensor var_1076 = const()[name = tensor("op_1076"), val = tensor([0, 2, 1])]; tensor y_27_pad_type_0 = const()[name = tensor("y_27_pad_type_0"), val = tensor("valid")]; tensor y_27_strides_0 = const()[name = tensor("y_27_strides_0"), val = tensor([1])]; tensor y_27_pad_0 = const()[name = tensor("y_27_pad_0"), val = tensor([0, 0])]; tensor y_27_dilations_0 = const()[name = tensor("y_27_dilations_0"), val = tensor([1])]; tensor y_27_groups_0 = const()[name = tensor("y_27_groups_0"), val = tensor(1)]; tensor dec_layers_4_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_4_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33294848))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35654208))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5672960)))]; tensor x_75_cast_fp16 = transpose(perm = var_1076, x = x_71_cast_fp16)[name = tensor("transpose_215")]; tensor y_27_cast_fp16 = conv(dilations = y_27_dilations_0, groups = y_27_groups_0, pad = y_27_pad_0, pad_type = y_27_pad_type_0, strides = y_27_strides_0, weight = dec_layers_4_pos_ff_proj_weight_to_fp16_quantized, x = x_75_cast_fp16)[name = tensor("y_27_cast_fp16")]; tensor x_77_mode_0 = const()[name = tensor("x_77_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = y_27_cast_fp16)[name = tensor("x_77_cast_fp16")]; tensor y_29_pad_type_0 = const()[name = tensor("y_29_pad_type_0"), val = tensor("valid")]; tensor y_29_strides_0 = const()[name = tensor("y_29_strides_0"), val = tensor([1])]; tensor y_29_pad_0 = const()[name = tensor("y_29_pad_0"), val = tensor([0, 0])]; tensor y_29_dilations_0 = const()[name = tensor("y_29_dilations_0"), val = tensor([1])]; tensor y_29_groups_0 = const()[name = tensor("y_29_groups_0"), val = tensor(1)]; tensor dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35660416))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38019776))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor y_29_cast_fp16 = conv(dilations = y_29_dilations_0, groups = y_29_groups_0, pad = y_29_pad_0, pad_type = y_29_pad_type_0, strides = y_29_strides_0, weight = dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized, x = x_77_cast_fp16)[name = tensor("y_29_cast_fp16")]; tensor var_1096 = const()[name = tensor("op_1096"), val = tensor([0, 2, 1])]; tensor h_49_cast_fp16 = transpose(perm = var_1096, x = y_29_cast_fp16)[name = tensor("transpose_214")]; tensor x_81_cast_fp16 = add(x = input_77_cast_fp16, y = h_49_cast_fp16)[name = tensor("x_81_cast_fp16")]; tensor var_1126 = const()[name = tensor("op_1126"), val = tensor(-1)]; tensor input_81_axes_0 = const()[name = tensor("input_81_axes_0"), val = tensor([-1])]; tensor dec_layers_5_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_5_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38021376)))]; tensor var_1124_to_fp16 = const()[name = tensor("op_1124_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_81_cast_fp16 = layer_norm(axes = input_81_axes_0, epsilon = var_1124_to_fp16, gamma = dec_layers_5_norm_self_weight_to_fp16, x = x_81_cast_fp16)[name = tensor("input_81_cast_fp16")]; tensor dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38022976))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39792512))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2281088)))]; tensor linear_25_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized, x = input_81_cast_fp16)[name = tensor("linear_25_cast_fp16")]; tensor var_1145 = const()[name = tensor("op_1145"), val = tensor([1, 111, 3, 12, 64])]; tensor qkv_23_cast_fp16 = reshape(shape = var_1145, x = linear_25_cast_fp16)[name = tensor("qkv_23_cast_fp16")]; tensor q_41_begin_0 = const()[name = tensor("q_41_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_41_end_0 = const()[name = tensor("q_41_end_0"), val = tensor([1, 111, 1, 12, 64])]; tensor q_41_end_mask_0 = const()[name = tensor("q_41_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_41_squeeze_mask_0 = const()[name = tensor("q_41_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_41_cast_fp16 = slice_by_index(begin = q_41_begin_0, end = q_41_end_0, end_mask = q_41_end_mask_0, squeeze_mask = q_41_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor("q_41_cast_fp16")]; tensor new_k_11_begin_0 = const()[name = tensor("new_k_11_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_11_end_0 = const()[name = tensor("new_k_11_end_0"), val = tensor([1, 111, 2, 12, 64])]; tensor new_k_11_end_mask_0 = const()[name = tensor("new_k_11_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_11_squeeze_mask_0 = const()[name = tensor("new_k_11_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_11_cast_fp16 = slice_by_index(begin = new_k_11_begin_0, end = new_k_11_end_0, end_mask = new_k_11_end_mask_0, squeeze_mask = new_k_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor("new_k_11_cast_fp16")]; tensor new_k_11_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_k_11_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor new_v_11_begin_0 = const()[name = tensor("new_v_11_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_11_end_0 = const()[name = tensor("new_v_11_end_0"), val = tensor([1, 111, 3, 12, 64])]; tensor new_v_11_end_mask_0 = const()[name = tensor("new_v_11_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_11_squeeze_mask_0 = const()[name = tensor("new_v_11_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_11_cast_fp16 = slice_by_index(begin = new_v_11_begin_0, end = new_v_11_end_0, end_mask = new_v_11_end_mask_0, squeeze_mask = new_v_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor("new_v_11_cast_fp16")]; tensor new_v_11_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_v_11_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_1160 = const()[name = tensor("op_1160"), val = tensor([0, 2, -3, -1])]; tensor var_1162_transpose_x_0 = const()[name = tensor("op_1162_transpose_x_0"), val = tensor(false)]; tensor var_1162_transpose_y_0 = const()[name = tensor("op_1162_transpose_y_0"), val = tensor(false)]; tensor transpose_116_perm_0 = const()[name = tensor("transpose_116_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_117_perm_0 = const()[name = tensor("transpose_117_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_117 = transpose(perm = transpose_117_perm_0, x = new_k_11_cast_fp16)[name = tensor("transpose_212")]; tensor transpose_116 = transpose(perm = transpose_116_perm_0, x = q_41_cast_fp16)[name = tensor("transpose_213")]; tensor var_1162_cast_fp16 = matmul(transpose_x = var_1162_transpose_x_0, transpose_y = var_1162_transpose_y_0, x = transpose_116, y = transpose_117)[name = tensor("op_1162_cast_fp16")]; tensor var_1163_to_fp16 = const()[name = tensor("op_1163_to_fp16"), val = tensor(0x1p-3)]; tensor scores_51_cast_fp16 = mul(x = var_1162_cast_fp16, y = var_1163_to_fp16)[name = tensor("scores_51_cast_fp16")]; tensor var_1121_to_fp16 = const()[name = tensor("op_1121_to_fp16"), val = tensor(-inf)]; tensor scores_53_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_51_cast_fp16)[name = tensor("scores_53_cast_fp16")]; tensor probs_21_cast_fp16 = softmax(axis = var_1126, x = scores_53_cast_fp16)[name = tensor("probs_21_cast_fp16")]; tensor var_1183_transpose_x_0 = const()[name = tensor("op_1183_transpose_x_0"), val = tensor(false)]; tensor var_1183_transpose_y_0 = const()[name = tensor("op_1183_transpose_y_0"), val = tensor(false)]; tensor vT_21_cast_fp16 = transpose(perm = var_1160, x = new_v_11_cast_fp16)[name = tensor("transpose_211")]; tensor var_1183_cast_fp16 = matmul(transpose_x = var_1183_transpose_x_0, transpose_y = var_1183_transpose_y_0, x = probs_21_cast_fp16, y = vT_21_cast_fp16)[name = tensor("op_1183_cast_fp16")]; tensor var_1184 = const()[name = tensor("op_1184"), val = tensor([0, 2, 1, 3])]; tensor var_1188 = const()[name = tensor("op_1188"), val = tensor([1, 111, -1])]; tensor y_31_cast_fp16 = transpose(perm = var_1184, x = var_1183_cast_fp16)[name = tensor("transpose_210")]; tensor input_83_cast_fp16 = reshape(shape = var_1188, x = y_31_cast_fp16)[name = tensor("input_83_cast_fp16")]; tensor dec_layers_5_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_5_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39797184))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40387072))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_26_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_5_self_attention_o_net_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = tensor("linear_26_cast_fp16")]; tensor input_85_cast_fp16 = add(x = x_81_cast_fp16, y = linear_26_cast_fp16)[name = tensor("input_85_cast_fp16")]; tensor x_85_axes_0 = const()[name = tensor("x_85_axes_0"), val = tensor([-1])]; tensor dec_layers_5_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_5_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40388672)))]; tensor x_85_cast_fp16 = layer_norm(axes = x_85_axes_0, epsilon = var_1124_to_fp16, gamma = dec_layers_5_norm_xattn_query_weight_to_fp16, x = input_85_cast_fp16)[name = tensor("x_85_cast_fp16")]; tensor memory_11_axes_0 = const()[name = tensor("memory_11_axes_0"), val = tensor([-1])]; tensor dec_layers_5_norm_xattn_memory_weight_to_fp16 = const()[name = tensor("dec_layers_5_norm_xattn_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40390272)))]; tensor memory_11_cast_fp16 = layer_norm(axes = memory_11_axes_0, epsilon = var_1124_to_fp16, gamma = dec_layers_5_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor("memory_11_cast_fp16")]; tensor dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40391872))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40490240))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3013056)))]; tensor linear_27_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized, x = x_85_cast_fp16)[name = tensor("linear_27_cast_fp16")]; tensor var_1210 = const()[name = tensor("op_1210"), val = tensor([1, 111, 1, 128])]; tensor q_45_cast_fp16 = reshape(shape = var_1210, x = linear_27_cast_fp16)[name = tensor("q_45_cast_fp16")]; tensor dec_layers_5_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_5_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40490560))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40687232))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3210560)))]; tensor linear_28_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_5_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_11_cast_fp16)[name = tensor("linear_28_cast_fp16")]; tensor var_1216 = const()[name = tensor("op_1216"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_11_cast_fp16 = reshape(shape = var_1216, x = linear_28_cast_fp16)[name = tensor("kv_11_cast_fp16")]; tensor k_11_begin_0 = const()[name = tensor("k_11_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor k_11_end_0 = const()[name = tensor("k_11_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor k_11_end_mask_0 = const()[name = tensor("k_11_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_11_squeeze_mask_0 = const()[name = tensor("k_11_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_11_cast_fp16 = slice_by_index(begin = k_11_begin_0, end = k_11_end_0, end_mask = k_11_end_mask_0, squeeze_mask = k_11_squeeze_mask_0, x = kv_11_cast_fp16)[name = tensor("k_11_cast_fp16")]; tensor k_11_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("k_11_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor v_11_begin_0 = const()[name = tensor("v_11_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor v_11_end_0 = const()[name = tensor("v_11_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor v_11_end_mask_0 = const()[name = tensor("v_11_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_11_squeeze_mask_0 = const()[name = tensor("v_11_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_11_cast_fp16 = slice_by_index(begin = v_11_begin_0, end = v_11_end_0, end_mask = v_11_end_mask_0, squeeze_mask = v_11_squeeze_mask_0, x = kv_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; tensor v_11_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("v_11_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_1229 = const()[name = tensor("op_1229"), val = tensor([0, 2, -3, -1])]; tensor var_1231_transpose_x_0 = const()[name = tensor("op_1231_transpose_x_0"), val = tensor(false)]; tensor var_1231_transpose_y_0 = const()[name = tensor("op_1231_transpose_y_0"), val = tensor(false)]; tensor transpose_118_perm_0 = const()[name = tensor("transpose_118_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_119_perm_0 = const()[name = tensor("transpose_119_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_119 = transpose(perm = transpose_119_perm_0, x = k_11_cast_fp16)[name = tensor("transpose_208")]; tensor transpose_118 = transpose(perm = transpose_118_perm_0, x = q_45_cast_fp16)[name = tensor("transpose_209")]; tensor var_1231_cast_fp16 = matmul(transpose_x = var_1231_transpose_x_0, transpose_y = var_1231_transpose_y_0, x = transpose_118, y = transpose_119)[name = tensor("op_1231_cast_fp16")]; tensor var_1232_to_fp16 = const()[name = tensor("op_1232_to_fp16"), val = tensor(0x1.6ap-4)]; tensor scores_57_cast_fp16 = mul(x = var_1231_cast_fp16, y = var_1232_to_fp16)[name = tensor("scores_57_cast_fp16")]; tensor scores_59_cast_fp16 = select(a = var_1121_to_fp16, b = scores_57_cast_fp16, cond = var_313_cast_fp16)[name = tensor("scores_59_cast_fp16")]; tensor probs_23_cast_fp16 = softmax(axis = var_1126, x = scores_59_cast_fp16)[name = tensor("probs_23_cast_fp16")]; tensor var_1241_transpose_x_0 = const()[name = tensor("op_1241_transpose_x_0"), val = tensor(false)]; tensor var_1241_transpose_y_0 = const()[name = tensor("op_1241_transpose_y_0"), val = tensor(false)]; tensor vT_23_cast_fp16 = transpose(perm = var_1229, x = v_11_cast_fp16)[name = tensor("transpose_207")]; tensor var_1241_cast_fp16 = matmul(transpose_x = var_1241_transpose_x_0, transpose_y = var_1241_transpose_y_0, x = probs_23_cast_fp16, y = vT_23_cast_fp16)[name = tensor("op_1241_cast_fp16")]; tensor var_1242 = const()[name = tensor("op_1242"), val = tensor([0, 2, 1, 3])]; tensor var_1244 = const()[name = tensor("op_1244"), val = tensor([1, 111, -1])]; tensor var_1243_cast_fp16 = transpose(perm = var_1242, x = var_1241_cast_fp16)[name = tensor("transpose_206")]; tensor input_87_cast_fp16 = reshape(shape = var_1244, x = var_1243_cast_fp16)[name = tensor("input_87_cast_fp16")]; tensor dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40687808))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40786176))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized, x = input_87_cast_fp16)[name = tensor("linear_29_cast_fp16")]; tensor input_89_cast_fp16 = add(x = input_85_cast_fp16, y = linear_29_cast_fp16)[name = tensor("input_89_cast_fp16")]; tensor x_87_axes_0 = const()[name = tensor("x_87_axes_0"), val = tensor([-1])]; tensor dec_layers_5_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_5_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40787776)))]; tensor x_87_cast_fp16 = layer_norm(axes = x_87_axes_0, epsilon = var_1124_to_fp16, gamma = dec_layers_5_norm_pos_ff_weight_to_fp16, x = input_89_cast_fp16)[name = tensor("x_87_cast_fp16")]; tensor var_1261 = const()[name = tensor("op_1261"), val = tensor([0, 2, 1])]; tensor y_33_pad_type_0 = const()[name = tensor("y_33_pad_type_0"), val = tensor("valid")]; tensor y_33_strides_0 = const()[name = tensor("y_33_strides_0"), val = tensor([1])]; tensor y_33_pad_0 = const()[name = tensor("y_33_pad_0"), val = tensor([0, 0])]; tensor y_33_dilations_0 = const()[name = tensor("y_33_dilations_0"), val = tensor([1])]; tensor y_33_groups_0 = const()[name = tensor("y_33_groups_0"), val = tensor(1)]; tensor dec_layers_5_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_5_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40789376))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43148736))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5672960)))]; tensor x_91_cast_fp16 = transpose(perm = var_1261, x = x_87_cast_fp16)[name = tensor("transpose_205")]; tensor y_33_cast_fp16 = conv(dilations = y_33_dilations_0, groups = y_33_groups_0, pad = y_33_pad_0, pad_type = y_33_pad_type_0, strides = y_33_strides_0, weight = dec_layers_5_pos_ff_proj_weight_to_fp16_quantized, x = x_91_cast_fp16)[name = tensor("y_33_cast_fp16")]; tensor x_93_mode_0 = const()[name = tensor("x_93_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_93_cast_fp16 = gelu(mode = x_93_mode_0, x = y_33_cast_fp16)[name = tensor("x_93_cast_fp16")]; tensor y_35_pad_type_0 = const()[name = tensor("y_35_pad_type_0"), val = tensor("valid")]; tensor y_35_strides_0 = const()[name = tensor("y_35_strides_0"), val = tensor([1])]; tensor y_35_pad_0 = const()[name = tensor("y_35_pad_0"), val = tensor([0, 0])]; tensor y_35_dilations_0 = const()[name = tensor("y_35_dilations_0"), val = tensor([1])]; tensor y_35_groups_0 = const()[name = tensor("y_35_groups_0"), val = tensor(1)]; tensor dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43154944))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45514304))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor y_35_cast_fp16 = conv(dilations = y_35_dilations_0, groups = y_35_groups_0, pad = y_35_pad_0, pad_type = y_35_pad_type_0, strides = y_35_strides_0, weight = dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized, x = x_93_cast_fp16)[name = tensor("y_35_cast_fp16")]; tensor var_1281 = const()[name = tensor("op_1281"), val = tensor([0, 2, 1])]; tensor h_59_cast_fp16 = transpose(perm = var_1281, x = y_35_cast_fp16)[name = tensor("transpose_204")]; tensor x_97_cast_fp16 = add(x = input_89_cast_fp16, y = h_59_cast_fp16)[name = tensor("x_97_cast_fp16")]; tensor var_1311 = const()[name = tensor("op_1311"), val = tensor(-1)]; tensor input_93_axes_0 = const()[name = tensor("input_93_axes_0"), val = tensor([-1])]; tensor dec_layers_6_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_6_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45515904)))]; tensor var_1309_to_fp16 = const()[name = tensor("op_1309_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_93_cast_fp16 = layer_norm(axes = input_93_axes_0, epsilon = var_1309_to_fp16, gamma = dec_layers_6_norm_self_weight_to_fp16, x = x_97_cast_fp16)[name = tensor("input_93_cast_fp16")]; tensor dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45517504))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47287040))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2281088)))]; tensor linear_30_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized, x = input_93_cast_fp16)[name = tensor("linear_30_cast_fp16")]; tensor var_1330 = const()[name = tensor("op_1330"), val = tensor([1, 111, 3, 12, 64])]; tensor qkv_27_cast_fp16 = reshape(shape = var_1330, x = linear_30_cast_fp16)[name = tensor("qkv_27_cast_fp16")]; tensor q_49_begin_0 = const()[name = tensor("q_49_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_49_end_0 = const()[name = tensor("q_49_end_0"), val = tensor([1, 111, 1, 12, 64])]; tensor q_49_end_mask_0 = const()[name = tensor("q_49_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_49_squeeze_mask_0 = const()[name = tensor("q_49_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_49_cast_fp16 = slice_by_index(begin = q_49_begin_0, end = q_49_end_0, end_mask = q_49_end_mask_0, squeeze_mask = q_49_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor("q_49_cast_fp16")]; tensor new_k_13_begin_0 = const()[name = tensor("new_k_13_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_13_end_0 = const()[name = tensor("new_k_13_end_0"), val = tensor([1, 111, 2, 12, 64])]; tensor new_k_13_end_mask_0 = const()[name = tensor("new_k_13_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_13_squeeze_mask_0 = const()[name = tensor("new_k_13_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_13_cast_fp16 = slice_by_index(begin = new_k_13_begin_0, end = new_k_13_end_0, end_mask = new_k_13_end_mask_0, squeeze_mask = new_k_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor("new_k_13_cast_fp16")]; tensor new_k_13_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_k_13_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor new_v_13_begin_0 = const()[name = tensor("new_v_13_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_13_end_0 = const()[name = tensor("new_v_13_end_0"), val = tensor([1, 111, 3, 12, 64])]; tensor new_v_13_end_mask_0 = const()[name = tensor("new_v_13_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_13_squeeze_mask_0 = const()[name = tensor("new_v_13_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_13_cast_fp16 = slice_by_index(begin = new_v_13_begin_0, end = new_v_13_end_0, end_mask = new_v_13_end_mask_0, squeeze_mask = new_v_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor("new_v_13_cast_fp16")]; tensor new_v_13_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_v_13_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_1345 = const()[name = tensor("op_1345"), val = tensor([0, 2, -3, -1])]; tensor var_1347_transpose_x_0 = const()[name = tensor("op_1347_transpose_x_0"), val = tensor(false)]; tensor var_1347_transpose_y_0 = const()[name = tensor("op_1347_transpose_y_0"), val = tensor(false)]; tensor transpose_120_perm_0 = const()[name = tensor("transpose_120_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_121_perm_0 = const()[name = tensor("transpose_121_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_121 = transpose(perm = transpose_121_perm_0, x = new_k_13_cast_fp16)[name = tensor("transpose_202")]; tensor transpose_120 = transpose(perm = transpose_120_perm_0, x = q_49_cast_fp16)[name = tensor("transpose_203")]; tensor var_1347_cast_fp16 = matmul(transpose_x = var_1347_transpose_x_0, transpose_y = var_1347_transpose_y_0, x = transpose_120, y = transpose_121)[name = tensor("op_1347_cast_fp16")]; tensor var_1348_to_fp16 = const()[name = tensor("op_1348_to_fp16"), val = tensor(0x1p-3)]; tensor scores_61_cast_fp16 = mul(x = var_1347_cast_fp16, y = var_1348_to_fp16)[name = tensor("scores_61_cast_fp16")]; tensor var_1306_to_fp16 = const()[name = tensor("op_1306_to_fp16"), val = tensor(-inf)]; tensor scores_63_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_61_cast_fp16)[name = tensor("scores_63_cast_fp16")]; tensor probs_25_cast_fp16 = softmax(axis = var_1311, x = scores_63_cast_fp16)[name = tensor("probs_25_cast_fp16")]; tensor var_1368_transpose_x_0 = const()[name = tensor("op_1368_transpose_x_0"), val = tensor(false)]; tensor var_1368_transpose_y_0 = const()[name = tensor("op_1368_transpose_y_0"), val = tensor(false)]; tensor vT_25_cast_fp16 = transpose(perm = var_1345, x = new_v_13_cast_fp16)[name = tensor("transpose_201")]; tensor var_1368_cast_fp16 = matmul(transpose_x = var_1368_transpose_x_0, transpose_y = var_1368_transpose_y_0, x = probs_25_cast_fp16, y = vT_25_cast_fp16)[name = tensor("op_1368_cast_fp16")]; tensor var_1369 = const()[name = tensor("op_1369"), val = tensor([0, 2, 1, 3])]; tensor var_1373 = const()[name = tensor("op_1373"), val = tensor([1, 111, -1])]; tensor y_37_cast_fp16 = transpose(perm = var_1369, x = var_1368_cast_fp16)[name = tensor("transpose_200")]; tensor input_95_cast_fp16 = reshape(shape = var_1373, x = y_37_cast_fp16)[name = tensor("input_95_cast_fp16")]; tensor dec_layers_6_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_6_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47291712))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47881600))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_6_self_attention_o_net_weight_to_fp16_quantized, x = input_95_cast_fp16)[name = tensor("linear_31_cast_fp16")]; tensor input_97_cast_fp16 = add(x = x_97_cast_fp16, y = linear_31_cast_fp16)[name = tensor("input_97_cast_fp16")]; tensor x_101_axes_0 = const()[name = tensor("x_101_axes_0"), val = tensor([-1])]; tensor dec_layers_6_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_6_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47883200)))]; tensor x_101_cast_fp16 = layer_norm(axes = x_101_axes_0, epsilon = var_1309_to_fp16, gamma = dec_layers_6_norm_xattn_query_weight_to_fp16, x = input_97_cast_fp16)[name = tensor("x_101_cast_fp16")]; tensor memory_13_axes_0 = const()[name = tensor("memory_13_axes_0"), val = tensor([-1])]; tensor dec_layers_6_norm_xattn_memory_weight_to_fp16 = const()[name = tensor("dec_layers_6_norm_xattn_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47884800)))]; tensor memory_13_cast_fp16 = layer_norm(axes = memory_13_axes_0, epsilon = var_1309_to_fp16, gamma = dec_layers_6_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor("memory_13_cast_fp16")]; tensor dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47886400))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47984768))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3013056)))]; tensor linear_32_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized, x = x_101_cast_fp16)[name = tensor("linear_32_cast_fp16")]; tensor var_1395 = const()[name = tensor("op_1395"), val = tensor([1, 111, 1, 128])]; tensor q_53_cast_fp16 = reshape(shape = var_1395, x = linear_32_cast_fp16)[name = tensor("q_53_cast_fp16")]; tensor dec_layers_6_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_6_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47985088))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48181760))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3210560)))]; tensor linear_33_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_6_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_13_cast_fp16)[name = tensor("linear_33_cast_fp16")]; tensor var_1401 = const()[name = tensor("op_1401"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_13_cast_fp16 = reshape(shape = var_1401, x = linear_33_cast_fp16)[name = tensor("kv_13_cast_fp16")]; tensor k_13_begin_0 = const()[name = tensor("k_13_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor k_13_end_0 = const()[name = tensor("k_13_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor k_13_end_mask_0 = const()[name = tensor("k_13_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_13_squeeze_mask_0 = const()[name = tensor("k_13_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_13_cast_fp16 = slice_by_index(begin = k_13_begin_0, end = k_13_end_0, end_mask = k_13_end_mask_0, squeeze_mask = k_13_squeeze_mask_0, x = kv_13_cast_fp16)[name = tensor("k_13_cast_fp16")]; tensor k_13_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("k_13_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor v_13_begin_0 = const()[name = tensor("v_13_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor v_13_end_0 = const()[name = tensor("v_13_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor v_13_end_mask_0 = const()[name = tensor("v_13_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_13_squeeze_mask_0 = const()[name = tensor("v_13_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_13_cast_fp16 = slice_by_index(begin = v_13_begin_0, end = v_13_end_0, end_mask = v_13_end_mask_0, squeeze_mask = v_13_squeeze_mask_0, x = kv_13_cast_fp16)[name = tensor("v_13_cast_fp16")]; tensor v_13_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("v_13_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_1414 = const()[name = tensor("op_1414"), val = tensor([0, 2, -3, -1])]; tensor var_1416_transpose_x_0 = const()[name = tensor("op_1416_transpose_x_0"), val = tensor(false)]; tensor var_1416_transpose_y_0 = const()[name = tensor("op_1416_transpose_y_0"), val = tensor(false)]; tensor transpose_122_perm_0 = const()[name = tensor("transpose_122_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_123_perm_0 = const()[name = tensor("transpose_123_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_123 = transpose(perm = transpose_123_perm_0, x = k_13_cast_fp16)[name = tensor("transpose_198")]; tensor transpose_122 = transpose(perm = transpose_122_perm_0, x = q_53_cast_fp16)[name = tensor("transpose_199")]; tensor var_1416_cast_fp16 = matmul(transpose_x = var_1416_transpose_x_0, transpose_y = var_1416_transpose_y_0, x = transpose_122, y = transpose_123)[name = tensor("op_1416_cast_fp16")]; tensor var_1417_to_fp16 = const()[name = tensor("op_1417_to_fp16"), val = tensor(0x1.6ap-4)]; tensor scores_67_cast_fp16 = mul(x = var_1416_cast_fp16, y = var_1417_to_fp16)[name = tensor("scores_67_cast_fp16")]; tensor scores_69_cast_fp16 = select(a = var_1306_to_fp16, b = scores_67_cast_fp16, cond = var_313_cast_fp16)[name = tensor("scores_69_cast_fp16")]; tensor probs_27_cast_fp16 = softmax(axis = var_1311, x = scores_69_cast_fp16)[name = tensor("probs_27_cast_fp16")]; tensor var_1426_transpose_x_0 = const()[name = tensor("op_1426_transpose_x_0"), val = tensor(false)]; tensor var_1426_transpose_y_0 = const()[name = tensor("op_1426_transpose_y_0"), val = tensor(false)]; tensor vT_27_cast_fp16 = transpose(perm = var_1414, x = v_13_cast_fp16)[name = tensor("transpose_197")]; tensor var_1426_cast_fp16 = matmul(transpose_x = var_1426_transpose_x_0, transpose_y = var_1426_transpose_y_0, x = probs_27_cast_fp16, y = vT_27_cast_fp16)[name = tensor("op_1426_cast_fp16")]; tensor var_1427 = const()[name = tensor("op_1427"), val = tensor([0, 2, 1, 3])]; tensor var_1429 = const()[name = tensor("op_1429"), val = tensor([1, 111, -1])]; tensor var_1428_cast_fp16 = transpose(perm = var_1427, x = var_1426_cast_fp16)[name = tensor("transpose_196")]; tensor input_99_cast_fp16 = reshape(shape = var_1429, x = var_1428_cast_fp16)[name = tensor("input_99_cast_fp16")]; tensor dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48182336))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48280704))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_34_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized, x = input_99_cast_fp16)[name = tensor("linear_34_cast_fp16")]; tensor input_101_cast_fp16 = add(x = input_97_cast_fp16, y = linear_34_cast_fp16)[name = tensor("input_101_cast_fp16")]; tensor x_103_axes_0 = const()[name = tensor("x_103_axes_0"), val = tensor([-1])]; tensor dec_layers_6_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_6_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48282304)))]; tensor x_103_cast_fp16 = layer_norm(axes = x_103_axes_0, epsilon = var_1309_to_fp16, gamma = dec_layers_6_norm_pos_ff_weight_to_fp16, x = input_101_cast_fp16)[name = tensor("x_103_cast_fp16")]; tensor var_1446 = const()[name = tensor("op_1446"), val = tensor([0, 2, 1])]; tensor y_39_pad_type_0 = const()[name = tensor("y_39_pad_type_0"), val = tensor("valid")]; tensor y_39_strides_0 = const()[name = tensor("y_39_strides_0"), val = tensor([1])]; tensor y_39_pad_0 = const()[name = tensor("y_39_pad_0"), val = tensor([0, 0])]; tensor y_39_dilations_0 = const()[name = tensor("y_39_dilations_0"), val = tensor([1])]; tensor y_39_groups_0 = const()[name = tensor("y_39_groups_0"), val = tensor(1)]; tensor dec_layers_6_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_6_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48283904))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50643264))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5672960)))]; tensor x_107_cast_fp16 = transpose(perm = var_1446, x = x_103_cast_fp16)[name = tensor("transpose_195")]; tensor y_39_cast_fp16 = conv(dilations = y_39_dilations_0, groups = y_39_groups_0, pad = y_39_pad_0, pad_type = y_39_pad_type_0, strides = y_39_strides_0, weight = dec_layers_6_pos_ff_proj_weight_to_fp16_quantized, x = x_107_cast_fp16)[name = tensor("y_39_cast_fp16")]; tensor x_109_mode_0 = const()[name = tensor("x_109_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = y_39_cast_fp16)[name = tensor("x_109_cast_fp16")]; tensor y_41_pad_type_0 = const()[name = tensor("y_41_pad_type_0"), val = tensor("valid")]; tensor y_41_strides_0 = const()[name = tensor("y_41_strides_0"), val = tensor([1])]; tensor y_41_pad_0 = const()[name = tensor("y_41_pad_0"), val = tensor([0, 0])]; tensor y_41_dilations_0 = const()[name = tensor("y_41_dilations_0"), val = tensor([1])]; tensor y_41_groups_0 = const()[name = tensor("y_41_groups_0"), val = tensor(1)]; tensor dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50649472))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53008832))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor y_41_cast_fp16 = conv(dilations = y_41_dilations_0, groups = y_41_groups_0, pad = y_41_pad_0, pad_type = y_41_pad_type_0, strides = y_41_strides_0, weight = dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized, x = x_109_cast_fp16)[name = tensor("y_41_cast_fp16")]; tensor var_1466 = const()[name = tensor("op_1466"), val = tensor([0, 2, 1])]; tensor h_69_cast_fp16 = transpose(perm = var_1466, x = y_41_cast_fp16)[name = tensor("transpose_194")]; tensor x_113_cast_fp16 = add(x = input_101_cast_fp16, y = h_69_cast_fp16)[name = tensor("x_113_cast_fp16")]; tensor var_1496 = const()[name = tensor("op_1496"), val = tensor(-1)]; tensor input_105_axes_0 = const()[name = tensor("input_105_axes_0"), val = tensor([-1])]; tensor dec_layers_7_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_7_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53010432)))]; tensor var_1494_to_fp16 = const()[name = tensor("op_1494_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_105_cast_fp16 = layer_norm(axes = input_105_axes_0, epsilon = var_1494_to_fp16, gamma = dec_layers_7_norm_self_weight_to_fp16, x = x_113_cast_fp16)[name = tensor("input_105_cast_fp16")]; tensor dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53012032))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54781568))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2281088)))]; tensor linear_35_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized, x = input_105_cast_fp16)[name = tensor("linear_35_cast_fp16")]; tensor var_1515 = const()[name = tensor("op_1515"), val = tensor([1, 111, 3, 12, 64])]; tensor qkv_31_cast_fp16 = reshape(shape = var_1515, x = linear_35_cast_fp16)[name = tensor("qkv_31_cast_fp16")]; tensor q_57_begin_0 = const()[name = tensor("q_57_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_57_end_0 = const()[name = tensor("q_57_end_0"), val = tensor([1, 111, 1, 12, 64])]; tensor q_57_end_mask_0 = const()[name = tensor("q_57_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_57_squeeze_mask_0 = const()[name = tensor("q_57_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_57_cast_fp16 = slice_by_index(begin = q_57_begin_0, end = q_57_end_0, end_mask = q_57_end_mask_0, squeeze_mask = q_57_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor("q_57_cast_fp16")]; tensor new_k_15_begin_0 = const()[name = tensor("new_k_15_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_15_end_0 = const()[name = tensor("new_k_15_end_0"), val = tensor([1, 111, 2, 12, 64])]; tensor new_k_15_end_mask_0 = const()[name = tensor("new_k_15_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_15_squeeze_mask_0 = const()[name = tensor("new_k_15_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_15_cast_fp16 = slice_by_index(begin = new_k_15_begin_0, end = new_k_15_end_0, end_mask = new_k_15_end_mask_0, squeeze_mask = new_k_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor("new_k_15_cast_fp16")]; tensor new_k_15_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_k_15_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor new_v_15_begin_0 = const()[name = tensor("new_v_15_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_15_end_0 = const()[name = tensor("new_v_15_end_0"), val = tensor([1, 111, 3, 12, 64])]; tensor new_v_15_end_mask_0 = const()[name = tensor("new_v_15_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_15_squeeze_mask_0 = const()[name = tensor("new_v_15_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_15_cast_fp16 = slice_by_index(begin = new_v_15_begin_0, end = new_v_15_end_0, end_mask = new_v_15_end_mask_0, squeeze_mask = new_v_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor("new_v_15_cast_fp16")]; tensor new_v_15_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_v_15_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_1530 = const()[name = tensor("op_1530"), val = tensor([0, 2, -3, -1])]; tensor var_1532_transpose_x_0 = const()[name = tensor("op_1532_transpose_x_0"), val = tensor(false)]; tensor var_1532_transpose_y_0 = const()[name = tensor("op_1532_transpose_y_0"), val = tensor(false)]; tensor transpose_124_perm_0 = const()[name = tensor("transpose_124_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_125_perm_0 = const()[name = tensor("transpose_125_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_125 = transpose(perm = transpose_125_perm_0, x = new_k_15_cast_fp16)[name = tensor("transpose_192")]; tensor transpose_124 = transpose(perm = transpose_124_perm_0, x = q_57_cast_fp16)[name = tensor("transpose_193")]; tensor var_1532_cast_fp16 = matmul(transpose_x = var_1532_transpose_x_0, transpose_y = var_1532_transpose_y_0, x = transpose_124, y = transpose_125)[name = tensor("op_1532_cast_fp16")]; tensor var_1533_to_fp16 = const()[name = tensor("op_1533_to_fp16"), val = tensor(0x1p-3)]; tensor scores_71_cast_fp16 = mul(x = var_1532_cast_fp16, y = var_1533_to_fp16)[name = tensor("scores_71_cast_fp16")]; tensor var_1491_to_fp16 = const()[name = tensor("op_1491_to_fp16"), val = tensor(-inf)]; tensor scores_73_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_71_cast_fp16)[name = tensor("scores_73_cast_fp16")]; tensor probs_29_cast_fp16 = softmax(axis = var_1496, x = scores_73_cast_fp16)[name = tensor("probs_29_cast_fp16")]; tensor var_1553_transpose_x_0 = const()[name = tensor("op_1553_transpose_x_0"), val = tensor(false)]; tensor var_1553_transpose_y_0 = const()[name = tensor("op_1553_transpose_y_0"), val = tensor(false)]; tensor vT_29_cast_fp16 = transpose(perm = var_1530, x = new_v_15_cast_fp16)[name = tensor("transpose_191")]; tensor var_1553_cast_fp16 = matmul(transpose_x = var_1553_transpose_x_0, transpose_y = var_1553_transpose_y_0, x = probs_29_cast_fp16, y = vT_29_cast_fp16)[name = tensor("op_1553_cast_fp16")]; tensor var_1554 = const()[name = tensor("op_1554"), val = tensor([0, 2, 1, 3])]; tensor var_1558 = const()[name = tensor("op_1558"), val = tensor([1, 111, -1])]; tensor y_43_cast_fp16 = transpose(perm = var_1554, x = var_1553_cast_fp16)[name = tensor("transpose_190")]; tensor input_107_cast_fp16 = reshape(shape = var_1558, x = y_43_cast_fp16)[name = tensor("input_107_cast_fp16")]; tensor dec_layers_7_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_7_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54786240))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55376128))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_36_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_7_self_attention_o_net_weight_to_fp16_quantized, x = input_107_cast_fp16)[name = tensor("linear_36_cast_fp16")]; tensor input_109_cast_fp16 = add(x = x_113_cast_fp16, y = linear_36_cast_fp16)[name = tensor("input_109_cast_fp16")]; tensor x_117_axes_0 = const()[name = tensor("x_117_axes_0"), val = tensor([-1])]; tensor dec_layers_7_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_7_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55377728)))]; tensor x_117_cast_fp16 = layer_norm(axes = x_117_axes_0, epsilon = var_1494_to_fp16, gamma = dec_layers_7_norm_xattn_query_weight_to_fp16, x = input_109_cast_fp16)[name = tensor("x_117_cast_fp16")]; tensor memory_15_axes_0 = const()[name = tensor("memory_15_axes_0"), val = tensor([-1])]; tensor dec_layers_7_norm_xattn_memory_weight_to_fp16 = const()[name = tensor("dec_layers_7_norm_xattn_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55379328)))]; tensor memory_15_cast_fp16 = layer_norm(axes = memory_15_axes_0, epsilon = var_1494_to_fp16, gamma = dec_layers_7_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor("memory_15_cast_fp16")]; tensor dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55380928))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55479296))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3013056)))]; tensor linear_37_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized, x = x_117_cast_fp16)[name = tensor("linear_37_cast_fp16")]; tensor var_1580 = const()[name = tensor("op_1580"), val = tensor([1, 111, 1, 128])]; tensor q_61_cast_fp16 = reshape(shape = var_1580, x = linear_37_cast_fp16)[name = tensor("q_61_cast_fp16")]; tensor dec_layers_7_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_7_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55479616))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55676288))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3210560)))]; tensor linear_38_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_7_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_15_cast_fp16)[name = tensor("linear_38_cast_fp16")]; tensor var_1586 = const()[name = tensor("op_1586"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_15_cast_fp16 = reshape(shape = var_1586, x = linear_38_cast_fp16)[name = tensor("kv_15_cast_fp16")]; tensor k_15_begin_0 = const()[name = tensor("k_15_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor k_15_end_0 = const()[name = tensor("k_15_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor k_15_end_mask_0 = const()[name = tensor("k_15_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_15_squeeze_mask_0 = const()[name = tensor("k_15_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_15_cast_fp16 = slice_by_index(begin = k_15_begin_0, end = k_15_end_0, end_mask = k_15_end_mask_0, squeeze_mask = k_15_squeeze_mask_0, x = kv_15_cast_fp16)[name = tensor("k_15_cast_fp16")]; tensor k_15_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("k_15_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor v_15_begin_0 = const()[name = tensor("v_15_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor v_15_end_0 = const()[name = tensor("v_15_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor v_15_end_mask_0 = const()[name = tensor("v_15_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_15_squeeze_mask_0 = const()[name = tensor("v_15_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_15_cast_fp16 = slice_by_index(begin = v_15_begin_0, end = v_15_end_0, end_mask = v_15_end_mask_0, squeeze_mask = v_15_squeeze_mask_0, x = kv_15_cast_fp16)[name = tensor("v_15_cast_fp16")]; tensor v_15_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("v_15_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_1599 = const()[name = tensor("op_1599"), val = tensor([0, 2, -3, -1])]; tensor var_1601_transpose_x_0 = const()[name = tensor("op_1601_transpose_x_0"), val = tensor(false)]; tensor var_1601_transpose_y_0 = const()[name = tensor("op_1601_transpose_y_0"), val = tensor(false)]; tensor transpose_126_perm_0 = const()[name = tensor("transpose_126_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_127_perm_0 = const()[name = tensor("transpose_127_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_127 = transpose(perm = transpose_127_perm_0, x = k_15_cast_fp16)[name = tensor("transpose_188")]; tensor transpose_126 = transpose(perm = transpose_126_perm_0, x = q_61_cast_fp16)[name = tensor("transpose_189")]; tensor var_1601_cast_fp16 = matmul(transpose_x = var_1601_transpose_x_0, transpose_y = var_1601_transpose_y_0, x = transpose_126, y = transpose_127)[name = tensor("op_1601_cast_fp16")]; tensor var_1602_to_fp16 = const()[name = tensor("op_1602_to_fp16"), val = tensor(0x1.6ap-4)]; tensor scores_77_cast_fp16 = mul(x = var_1601_cast_fp16, y = var_1602_to_fp16)[name = tensor("scores_77_cast_fp16")]; tensor scores_79_cast_fp16 = select(a = var_1491_to_fp16, b = scores_77_cast_fp16, cond = var_313_cast_fp16)[name = tensor("scores_79_cast_fp16")]; tensor probs_31_cast_fp16 = softmax(axis = var_1496, x = scores_79_cast_fp16)[name = tensor("probs_31_cast_fp16")]; tensor var_1611_transpose_x_0 = const()[name = tensor("op_1611_transpose_x_0"), val = tensor(false)]; tensor var_1611_transpose_y_0 = const()[name = tensor("op_1611_transpose_y_0"), val = tensor(false)]; tensor vT_31_cast_fp16 = transpose(perm = var_1599, x = v_15_cast_fp16)[name = tensor("transpose_187")]; tensor var_1611_cast_fp16 = matmul(transpose_x = var_1611_transpose_x_0, transpose_y = var_1611_transpose_y_0, x = probs_31_cast_fp16, y = vT_31_cast_fp16)[name = tensor("op_1611_cast_fp16")]; tensor var_1612 = const()[name = tensor("op_1612"), val = tensor([0, 2, 1, 3])]; tensor var_1614 = const()[name = tensor("op_1614"), val = tensor([1, 111, -1])]; tensor var_1613_cast_fp16 = transpose(perm = var_1612, x = var_1611_cast_fp16)[name = tensor("transpose_186")]; tensor input_111_cast_fp16 = reshape(shape = var_1614, x = var_1613_cast_fp16)[name = tensor("input_111_cast_fp16")]; tensor dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55676864))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55775232))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_39_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized, x = input_111_cast_fp16)[name = tensor("linear_39_cast_fp16")]; tensor input_113_cast_fp16 = add(x = input_109_cast_fp16, y = linear_39_cast_fp16)[name = tensor("input_113_cast_fp16")]; tensor x_119_axes_0 = const()[name = tensor("x_119_axes_0"), val = tensor([-1])]; tensor dec_layers_7_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_7_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55776832)))]; tensor x_119_cast_fp16 = layer_norm(axes = x_119_axes_0, epsilon = var_1494_to_fp16, gamma = dec_layers_7_norm_pos_ff_weight_to_fp16, x = input_113_cast_fp16)[name = tensor("x_119_cast_fp16")]; tensor var_1631 = const()[name = tensor("op_1631"), val = tensor([0, 2, 1])]; tensor y_45_pad_type_0 = const()[name = tensor("y_45_pad_type_0"), val = tensor("valid")]; tensor y_45_strides_0 = const()[name = tensor("y_45_strides_0"), val = tensor([1])]; tensor y_45_pad_0 = const()[name = tensor("y_45_pad_0"), val = tensor([0, 0])]; tensor y_45_dilations_0 = const()[name = tensor("y_45_dilations_0"), val = tensor([1])]; tensor y_45_groups_0 = const()[name = tensor("y_45_groups_0"), val = tensor(1)]; tensor dec_layers_7_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_7_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55778432))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58137792))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5672960)))]; tensor x_123_cast_fp16 = transpose(perm = var_1631, x = x_119_cast_fp16)[name = tensor("transpose_185")]; tensor y_45_cast_fp16 = conv(dilations = y_45_dilations_0, groups = y_45_groups_0, pad = y_45_pad_0, pad_type = y_45_pad_type_0, strides = y_45_strides_0, weight = dec_layers_7_pos_ff_proj_weight_to_fp16_quantized, x = x_123_cast_fp16)[name = tensor("y_45_cast_fp16")]; tensor x_125_mode_0 = const()[name = tensor("x_125_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_125_cast_fp16 = gelu(mode = x_125_mode_0, x = y_45_cast_fp16)[name = tensor("x_125_cast_fp16")]; tensor y_47_pad_type_0 = const()[name = tensor("y_47_pad_type_0"), val = tensor("valid")]; tensor y_47_strides_0 = const()[name = tensor("y_47_strides_0"), val = tensor([1])]; tensor y_47_pad_0 = const()[name = tensor("y_47_pad_0"), val = tensor([0, 0])]; tensor y_47_dilations_0 = const()[name = tensor("y_47_dilations_0"), val = tensor([1])]; tensor y_47_groups_0 = const()[name = tensor("y_47_groups_0"), val = tensor(1)]; tensor dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58144000))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60503360))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor y_47_cast_fp16 = conv(dilations = y_47_dilations_0, groups = y_47_groups_0, pad = y_47_pad_0, pad_type = y_47_pad_type_0, strides = y_47_strides_0, weight = dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized, x = x_125_cast_fp16)[name = tensor("y_47_cast_fp16")]; tensor var_1651 = const()[name = tensor("op_1651"), val = tensor([0, 2, 1])]; tensor h_79_cast_fp16 = transpose(perm = var_1651, x = y_47_cast_fp16)[name = tensor("transpose_184")]; tensor x_129_cast_fp16 = add(x = input_113_cast_fp16, y = h_79_cast_fp16)[name = tensor("x_129_cast_fp16")]; tensor var_1681 = const()[name = tensor("op_1681"), val = tensor(-1)]; tensor input_117_axes_0 = const()[name = tensor("input_117_axes_0"), val = tensor([-1])]; tensor dec_layers_8_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_8_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60504960)))]; tensor var_1679_to_fp16 = const()[name = tensor("op_1679_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_117_cast_fp16 = layer_norm(axes = input_117_axes_0, epsilon = var_1679_to_fp16, gamma = dec_layers_8_norm_self_weight_to_fp16, x = x_129_cast_fp16)[name = tensor("input_117_cast_fp16")]; tensor dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60506560))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62276096))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2281088)))]; tensor linear_40_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized, x = input_117_cast_fp16)[name = tensor("linear_40_cast_fp16")]; tensor var_1700 = const()[name = tensor("op_1700"), val = tensor([1, 111, 3, 12, 64])]; tensor qkv_35_cast_fp16 = reshape(shape = var_1700, x = linear_40_cast_fp16)[name = tensor("qkv_35_cast_fp16")]; tensor q_65_begin_0 = const()[name = tensor("q_65_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_65_end_0 = const()[name = tensor("q_65_end_0"), val = tensor([1, 111, 1, 12, 64])]; tensor q_65_end_mask_0 = const()[name = tensor("q_65_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_65_squeeze_mask_0 = const()[name = tensor("q_65_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_65_cast_fp16 = slice_by_index(begin = q_65_begin_0, end = q_65_end_0, end_mask = q_65_end_mask_0, squeeze_mask = q_65_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor("q_65_cast_fp16")]; tensor new_k_17_begin_0 = const()[name = tensor("new_k_17_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_17_end_0 = const()[name = tensor("new_k_17_end_0"), val = tensor([1, 111, 2, 12, 64])]; tensor new_k_17_end_mask_0 = const()[name = tensor("new_k_17_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_17_squeeze_mask_0 = const()[name = tensor("new_k_17_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_17_cast_fp16 = slice_by_index(begin = new_k_17_begin_0, end = new_k_17_end_0, end_mask = new_k_17_end_mask_0, squeeze_mask = new_k_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor("new_k_17_cast_fp16")]; tensor new_k_17_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_k_17_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor new_v_17_begin_0 = const()[name = tensor("new_v_17_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_17_end_0 = const()[name = tensor("new_v_17_end_0"), val = tensor([1, 111, 3, 12, 64])]; tensor new_v_17_end_mask_0 = const()[name = tensor("new_v_17_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_17_squeeze_mask_0 = const()[name = tensor("new_v_17_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_17_cast_fp16 = slice_by_index(begin = new_v_17_begin_0, end = new_v_17_end_0, end_mask = new_v_17_end_mask_0, squeeze_mask = new_v_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor("new_v_17_cast_fp16")]; tensor new_v_17_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_v_17_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_1715 = const()[name = tensor("op_1715"), val = tensor([0, 2, -3, -1])]; tensor var_1717_transpose_x_0 = const()[name = tensor("op_1717_transpose_x_0"), val = tensor(false)]; tensor var_1717_transpose_y_0 = const()[name = tensor("op_1717_transpose_y_0"), val = tensor(false)]; tensor transpose_128_perm_0 = const()[name = tensor("transpose_128_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_129_perm_0 = const()[name = tensor("transpose_129_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_129 = transpose(perm = transpose_129_perm_0, x = new_k_17_cast_fp16)[name = tensor("transpose_182")]; tensor transpose_128 = transpose(perm = transpose_128_perm_0, x = q_65_cast_fp16)[name = tensor("transpose_183")]; tensor var_1717_cast_fp16 = matmul(transpose_x = var_1717_transpose_x_0, transpose_y = var_1717_transpose_y_0, x = transpose_128, y = transpose_129)[name = tensor("op_1717_cast_fp16")]; tensor var_1718_to_fp16 = const()[name = tensor("op_1718_to_fp16"), val = tensor(0x1p-3)]; tensor scores_81_cast_fp16 = mul(x = var_1717_cast_fp16, y = var_1718_to_fp16)[name = tensor("scores_81_cast_fp16")]; tensor var_1676_to_fp16 = const()[name = tensor("op_1676_to_fp16"), val = tensor(-inf)]; tensor scores_83_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_81_cast_fp16)[name = tensor("scores_83_cast_fp16")]; tensor probs_33_cast_fp16 = softmax(axis = var_1681, x = scores_83_cast_fp16)[name = tensor("probs_33_cast_fp16")]; tensor var_1738_transpose_x_0 = const()[name = tensor("op_1738_transpose_x_0"), val = tensor(false)]; tensor var_1738_transpose_y_0 = const()[name = tensor("op_1738_transpose_y_0"), val = tensor(false)]; tensor vT_33_cast_fp16 = transpose(perm = var_1715, x = new_v_17_cast_fp16)[name = tensor("transpose_181")]; tensor var_1738_cast_fp16 = matmul(transpose_x = var_1738_transpose_x_0, transpose_y = var_1738_transpose_y_0, x = probs_33_cast_fp16, y = vT_33_cast_fp16)[name = tensor("op_1738_cast_fp16")]; tensor var_1739 = const()[name = tensor("op_1739"), val = tensor([0, 2, 1, 3])]; tensor var_1743 = const()[name = tensor("op_1743"), val = tensor([1, 111, -1])]; tensor y_49_cast_fp16 = transpose(perm = var_1739, x = var_1738_cast_fp16)[name = tensor("transpose_180")]; tensor input_119_cast_fp16 = reshape(shape = var_1743, x = y_49_cast_fp16)[name = tensor("input_119_cast_fp16")]; tensor dec_layers_8_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_8_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62280768))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62870656))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_8_self_attention_o_net_weight_to_fp16_quantized, x = input_119_cast_fp16)[name = tensor("linear_41_cast_fp16")]; tensor input_121_cast_fp16 = add(x = x_129_cast_fp16, y = linear_41_cast_fp16)[name = tensor("input_121_cast_fp16")]; tensor x_133_axes_0 = const()[name = tensor("x_133_axes_0"), val = tensor([-1])]; tensor dec_layers_8_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_8_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62872256)))]; tensor x_133_cast_fp16 = layer_norm(axes = x_133_axes_0, epsilon = var_1679_to_fp16, gamma = dec_layers_8_norm_xattn_query_weight_to_fp16, x = input_121_cast_fp16)[name = tensor("x_133_cast_fp16")]; tensor memory_17_axes_0 = const()[name = tensor("memory_17_axes_0"), val = tensor([-1])]; tensor dec_layers_8_norm_xattn_memory_weight_to_fp16 = const()[name = tensor("dec_layers_8_norm_xattn_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62873856)))]; tensor memory_17_cast_fp16 = layer_norm(axes = memory_17_axes_0, epsilon = var_1679_to_fp16, gamma = dec_layers_8_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor("memory_17_cast_fp16")]; tensor dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62875456))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62973824))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3013056)))]; tensor linear_42_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized, x = x_133_cast_fp16)[name = tensor("linear_42_cast_fp16")]; tensor var_1765 = const()[name = tensor("op_1765"), val = tensor([1, 111, 1, 128])]; tensor q_69_cast_fp16 = reshape(shape = var_1765, x = linear_42_cast_fp16)[name = tensor("q_69_cast_fp16")]; tensor dec_layers_8_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_8_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62974144))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63170816))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3210560)))]; tensor linear_43_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_8_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_17_cast_fp16)[name = tensor("linear_43_cast_fp16")]; tensor var_1771 = const()[name = tensor("op_1771"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_17_cast_fp16 = reshape(shape = var_1771, x = linear_43_cast_fp16)[name = tensor("kv_17_cast_fp16")]; tensor k_17_begin_0 = const()[name = tensor("k_17_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor k_17_end_0 = const()[name = tensor("k_17_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor k_17_end_mask_0 = const()[name = tensor("k_17_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_17_squeeze_mask_0 = const()[name = tensor("k_17_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_17_cast_fp16 = slice_by_index(begin = k_17_begin_0, end = k_17_end_0, end_mask = k_17_end_mask_0, squeeze_mask = k_17_squeeze_mask_0, x = kv_17_cast_fp16)[name = tensor("k_17_cast_fp16")]; tensor k_17_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("k_17_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor v_17_begin_0 = const()[name = tensor("v_17_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor v_17_end_0 = const()[name = tensor("v_17_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor v_17_end_mask_0 = const()[name = tensor("v_17_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_17_squeeze_mask_0 = const()[name = tensor("v_17_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_17_cast_fp16 = slice_by_index(begin = v_17_begin_0, end = v_17_end_0, end_mask = v_17_end_mask_0, squeeze_mask = v_17_squeeze_mask_0, x = kv_17_cast_fp16)[name = tensor("v_17_cast_fp16")]; tensor v_17_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("v_17_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_1784 = const()[name = tensor("op_1784"), val = tensor([0, 2, -3, -1])]; tensor var_1786_transpose_x_0 = const()[name = tensor("op_1786_transpose_x_0"), val = tensor(false)]; tensor var_1786_transpose_y_0 = const()[name = tensor("op_1786_transpose_y_0"), val = tensor(false)]; tensor transpose_130_perm_0 = const()[name = tensor("transpose_130_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_131_perm_0 = const()[name = tensor("transpose_131_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_131 = transpose(perm = transpose_131_perm_0, x = k_17_cast_fp16)[name = tensor("transpose_178")]; tensor transpose_130 = transpose(perm = transpose_130_perm_0, x = q_69_cast_fp16)[name = tensor("transpose_179")]; tensor var_1786_cast_fp16 = matmul(transpose_x = var_1786_transpose_x_0, transpose_y = var_1786_transpose_y_0, x = transpose_130, y = transpose_131)[name = tensor("op_1786_cast_fp16")]; tensor var_1787_to_fp16 = const()[name = tensor("op_1787_to_fp16"), val = tensor(0x1.6ap-4)]; tensor scores_87_cast_fp16 = mul(x = var_1786_cast_fp16, y = var_1787_to_fp16)[name = tensor("scores_87_cast_fp16")]; tensor scores_89_cast_fp16 = select(a = var_1676_to_fp16, b = scores_87_cast_fp16, cond = var_313_cast_fp16)[name = tensor("scores_89_cast_fp16")]; tensor probs_35_cast_fp16 = softmax(axis = var_1681, x = scores_89_cast_fp16)[name = tensor("probs_35_cast_fp16")]; tensor var_1796_transpose_x_0 = const()[name = tensor("op_1796_transpose_x_0"), val = tensor(false)]; tensor var_1796_transpose_y_0 = const()[name = tensor("op_1796_transpose_y_0"), val = tensor(false)]; tensor vT_35_cast_fp16 = transpose(perm = var_1784, x = v_17_cast_fp16)[name = tensor("transpose_177")]; tensor var_1796_cast_fp16 = matmul(transpose_x = var_1796_transpose_x_0, transpose_y = var_1796_transpose_y_0, x = probs_35_cast_fp16, y = vT_35_cast_fp16)[name = tensor("op_1796_cast_fp16")]; tensor var_1797 = const()[name = tensor("op_1797"), val = tensor([0, 2, 1, 3])]; tensor var_1799 = const()[name = tensor("op_1799"), val = tensor([1, 111, -1])]; tensor var_1798_cast_fp16 = transpose(perm = var_1797, x = var_1796_cast_fp16)[name = tensor("transpose_176")]; tensor input_123_cast_fp16 = reshape(shape = var_1799, x = var_1798_cast_fp16)[name = tensor("input_123_cast_fp16")]; tensor dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63171392))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63269760))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = tensor("linear_44_cast_fp16")]; tensor input_125_cast_fp16 = add(x = input_121_cast_fp16, y = linear_44_cast_fp16)[name = tensor("input_125_cast_fp16")]; tensor x_135_axes_0 = const()[name = tensor("x_135_axes_0"), val = tensor([-1])]; tensor dec_layers_8_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_8_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63271360)))]; tensor x_135_cast_fp16 = layer_norm(axes = x_135_axes_0, epsilon = var_1679_to_fp16, gamma = dec_layers_8_norm_pos_ff_weight_to_fp16, x = input_125_cast_fp16)[name = tensor("x_135_cast_fp16")]; tensor var_1816 = const()[name = tensor("op_1816"), val = tensor([0, 2, 1])]; tensor y_51_pad_type_0 = const()[name = tensor("y_51_pad_type_0"), val = tensor("valid")]; tensor y_51_strides_0 = const()[name = tensor("y_51_strides_0"), val = tensor([1])]; tensor y_51_pad_0 = const()[name = tensor("y_51_pad_0"), val = tensor([0, 0])]; tensor y_51_dilations_0 = const()[name = tensor("y_51_dilations_0"), val = tensor([1])]; tensor y_51_groups_0 = const()[name = tensor("y_51_groups_0"), val = tensor(1)]; tensor dec_layers_8_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_8_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63272960))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65632320))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5672960)))]; tensor x_139_cast_fp16 = transpose(perm = var_1816, x = x_135_cast_fp16)[name = tensor("transpose_175")]; tensor y_51_cast_fp16 = conv(dilations = y_51_dilations_0, groups = y_51_groups_0, pad = y_51_pad_0, pad_type = y_51_pad_type_0, strides = y_51_strides_0, weight = dec_layers_8_pos_ff_proj_weight_to_fp16_quantized, x = x_139_cast_fp16)[name = tensor("y_51_cast_fp16")]; tensor x_141_mode_0 = const()[name = tensor("x_141_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_141_cast_fp16 = gelu(mode = x_141_mode_0, x = y_51_cast_fp16)[name = tensor("x_141_cast_fp16")]; tensor y_53_pad_type_0 = const()[name = tensor("y_53_pad_type_0"), val = tensor("valid")]; tensor y_53_strides_0 = const()[name = tensor("y_53_strides_0"), val = tensor([1])]; tensor y_53_pad_0 = const()[name = tensor("y_53_pad_0"), val = tensor([0, 0])]; tensor y_53_dilations_0 = const()[name = tensor("y_53_dilations_0"), val = tensor([1])]; tensor y_53_groups_0 = const()[name = tensor("y_53_groups_0"), val = tensor(1)]; tensor dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65638528))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67997888))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor y_53_cast_fp16 = conv(dilations = y_53_dilations_0, groups = y_53_groups_0, pad = y_53_pad_0, pad_type = y_53_pad_type_0, strides = y_53_strides_0, weight = dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized, x = x_141_cast_fp16)[name = tensor("y_53_cast_fp16")]; tensor var_1836 = const()[name = tensor("op_1836"), val = tensor([0, 2, 1])]; tensor h_89_cast_fp16 = transpose(perm = var_1836, x = y_53_cast_fp16)[name = tensor("transpose_174")]; tensor x_145_cast_fp16 = add(x = input_125_cast_fp16, y = h_89_cast_fp16)[name = tensor("x_145_cast_fp16")]; tensor var_1866 = const()[name = tensor("op_1866"), val = tensor(-1)]; tensor input_129_axes_0 = const()[name = tensor("input_129_axes_0"), val = tensor([-1])]; tensor dec_layers_9_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_9_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67999488)))]; tensor var_1864_to_fp16 = const()[name = tensor("op_1864_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_129_cast_fp16 = layer_norm(axes = input_129_axes_0, epsilon = var_1864_to_fp16, gamma = dec_layers_9_norm_self_weight_to_fp16, x = x_145_cast_fp16)[name = tensor("input_129_cast_fp16")]; tensor dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68001088))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69770624))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2281088)))]; tensor linear_45_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized, x = input_129_cast_fp16)[name = tensor("linear_45_cast_fp16")]; tensor var_1885 = const()[name = tensor("op_1885"), val = tensor([1, 111, 3, 12, 64])]; tensor qkv_39_cast_fp16 = reshape(shape = var_1885, x = linear_45_cast_fp16)[name = tensor("qkv_39_cast_fp16")]; tensor q_73_begin_0 = const()[name = tensor("q_73_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_73_end_0 = const()[name = tensor("q_73_end_0"), val = tensor([1, 111, 1, 12, 64])]; tensor q_73_end_mask_0 = const()[name = tensor("q_73_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_73_squeeze_mask_0 = const()[name = tensor("q_73_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_73_cast_fp16 = slice_by_index(begin = q_73_begin_0, end = q_73_end_0, end_mask = q_73_end_mask_0, squeeze_mask = q_73_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor("q_73_cast_fp16")]; tensor new_k_19_begin_0 = const()[name = tensor("new_k_19_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_19_end_0 = const()[name = tensor("new_k_19_end_0"), val = tensor([1, 111, 2, 12, 64])]; tensor new_k_19_end_mask_0 = const()[name = tensor("new_k_19_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_19_squeeze_mask_0 = const()[name = tensor("new_k_19_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_19_cast_fp16 = slice_by_index(begin = new_k_19_begin_0, end = new_k_19_end_0, end_mask = new_k_19_end_mask_0, squeeze_mask = new_k_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor("new_k_19_cast_fp16")]; tensor new_k_19_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_k_19_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor new_v_19_begin_0 = const()[name = tensor("new_v_19_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_19_end_0 = const()[name = tensor("new_v_19_end_0"), val = tensor([1, 111, 3, 12, 64])]; tensor new_v_19_end_mask_0 = const()[name = tensor("new_v_19_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_19_squeeze_mask_0 = const()[name = tensor("new_v_19_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_19_cast_fp16 = slice_by_index(begin = new_v_19_begin_0, end = new_v_19_end_0, end_mask = new_v_19_end_mask_0, squeeze_mask = new_v_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor("new_v_19_cast_fp16")]; tensor new_v_19_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_v_19_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_1900 = const()[name = tensor("op_1900"), val = tensor([0, 2, -3, -1])]; tensor var_1902_transpose_x_0 = const()[name = tensor("op_1902_transpose_x_0"), val = tensor(false)]; tensor var_1902_transpose_y_0 = const()[name = tensor("op_1902_transpose_y_0"), val = tensor(false)]; tensor transpose_132_perm_0 = const()[name = tensor("transpose_132_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_133_perm_0 = const()[name = tensor("transpose_133_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_133 = transpose(perm = transpose_133_perm_0, x = new_k_19_cast_fp16)[name = tensor("transpose_172")]; tensor transpose_132 = transpose(perm = transpose_132_perm_0, x = q_73_cast_fp16)[name = tensor("transpose_173")]; tensor var_1902_cast_fp16 = matmul(transpose_x = var_1902_transpose_x_0, transpose_y = var_1902_transpose_y_0, x = transpose_132, y = transpose_133)[name = tensor("op_1902_cast_fp16")]; tensor var_1903_to_fp16 = const()[name = tensor("op_1903_to_fp16"), val = tensor(0x1p-3)]; tensor scores_91_cast_fp16 = mul(x = var_1902_cast_fp16, y = var_1903_to_fp16)[name = tensor("scores_91_cast_fp16")]; tensor var_1861_to_fp16 = const()[name = tensor("op_1861_to_fp16"), val = tensor(-inf)]; tensor scores_93_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_91_cast_fp16)[name = tensor("scores_93_cast_fp16")]; tensor probs_37_cast_fp16 = softmax(axis = var_1866, x = scores_93_cast_fp16)[name = tensor("probs_37_cast_fp16")]; tensor var_1923_transpose_x_0 = const()[name = tensor("op_1923_transpose_x_0"), val = tensor(false)]; tensor var_1923_transpose_y_0 = const()[name = tensor("op_1923_transpose_y_0"), val = tensor(false)]; tensor vT_37_cast_fp16 = transpose(perm = var_1900, x = new_v_19_cast_fp16)[name = tensor("transpose_171")]; tensor var_1923_cast_fp16 = matmul(transpose_x = var_1923_transpose_x_0, transpose_y = var_1923_transpose_y_0, x = probs_37_cast_fp16, y = vT_37_cast_fp16)[name = tensor("op_1923_cast_fp16")]; tensor var_1924 = const()[name = tensor("op_1924"), val = tensor([0, 2, 1, 3])]; tensor var_1928 = const()[name = tensor("op_1928"), val = tensor([1, 111, -1])]; tensor y_55_cast_fp16 = transpose(perm = var_1924, x = var_1923_cast_fp16)[name = tensor("transpose_170")]; tensor input_131_cast_fp16 = reshape(shape = var_1928, x = y_55_cast_fp16)[name = tensor("input_131_cast_fp16")]; tensor dec_layers_9_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_9_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69775296))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70365184))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_46_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_9_self_attention_o_net_weight_to_fp16_quantized, x = input_131_cast_fp16)[name = tensor("linear_46_cast_fp16")]; tensor input_133_cast_fp16 = add(x = x_145_cast_fp16, y = linear_46_cast_fp16)[name = tensor("input_133_cast_fp16")]; tensor x_149_axes_0 = const()[name = tensor("x_149_axes_0"), val = tensor([-1])]; tensor dec_layers_9_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_9_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70366784)))]; tensor x_149_cast_fp16 = layer_norm(axes = x_149_axes_0, epsilon = var_1864_to_fp16, gamma = dec_layers_9_norm_xattn_query_weight_to_fp16, x = input_133_cast_fp16)[name = tensor("x_149_cast_fp16")]; tensor memory_19_axes_0 = const()[name = tensor("memory_19_axes_0"), val = tensor([-1])]; tensor dec_layers_9_norm_xattn_memory_weight_to_fp16 = const()[name = tensor("dec_layers_9_norm_xattn_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70368384)))]; tensor memory_19_cast_fp16 = layer_norm(axes = memory_19_axes_0, epsilon = var_1864_to_fp16, gamma = dec_layers_9_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor("memory_19_cast_fp16")]; tensor dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70369984))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70468352))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3013056)))]; tensor linear_47_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized, x = x_149_cast_fp16)[name = tensor("linear_47_cast_fp16")]; tensor var_1950 = const()[name = tensor("op_1950"), val = tensor([1, 111, 1, 128])]; tensor q_77_cast_fp16 = reshape(shape = var_1950, x = linear_47_cast_fp16)[name = tensor("q_77_cast_fp16")]; tensor dec_layers_9_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_9_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70468672))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70665344))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3210560)))]; tensor linear_48_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_9_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_19_cast_fp16)[name = tensor("linear_48_cast_fp16")]; tensor var_1956 = const()[name = tensor("op_1956"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_19_cast_fp16 = reshape(shape = var_1956, x = linear_48_cast_fp16)[name = tensor("kv_19_cast_fp16")]; tensor k_19_begin_0 = const()[name = tensor("k_19_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor k_19_end_0 = const()[name = tensor("k_19_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor k_19_end_mask_0 = const()[name = tensor("k_19_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_19_squeeze_mask_0 = const()[name = tensor("k_19_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_19_cast_fp16 = slice_by_index(begin = k_19_begin_0, end = k_19_end_0, end_mask = k_19_end_mask_0, squeeze_mask = k_19_squeeze_mask_0, x = kv_19_cast_fp16)[name = tensor("k_19_cast_fp16")]; tensor k_19_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("k_19_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor v_19_begin_0 = const()[name = tensor("v_19_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor v_19_end_0 = const()[name = tensor("v_19_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor v_19_end_mask_0 = const()[name = tensor("v_19_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_19_squeeze_mask_0 = const()[name = tensor("v_19_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_19_cast_fp16 = slice_by_index(begin = v_19_begin_0, end = v_19_end_0, end_mask = v_19_end_mask_0, squeeze_mask = v_19_squeeze_mask_0, x = kv_19_cast_fp16)[name = tensor("v_19_cast_fp16")]; tensor v_19_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("v_19_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_1969 = const()[name = tensor("op_1969"), val = tensor([0, 2, -3, -1])]; tensor var_1971_transpose_x_0 = const()[name = tensor("op_1971_transpose_x_0"), val = tensor(false)]; tensor var_1971_transpose_y_0 = const()[name = tensor("op_1971_transpose_y_0"), val = tensor(false)]; tensor transpose_134_perm_0 = const()[name = tensor("transpose_134_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_135_perm_0 = const()[name = tensor("transpose_135_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_135 = transpose(perm = transpose_135_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_168")]; tensor transpose_134 = transpose(perm = transpose_134_perm_0, x = q_77_cast_fp16)[name = tensor("transpose_169")]; tensor var_1971_cast_fp16 = matmul(transpose_x = var_1971_transpose_x_0, transpose_y = var_1971_transpose_y_0, x = transpose_134, y = transpose_135)[name = tensor("op_1971_cast_fp16")]; tensor var_1972_to_fp16 = const()[name = tensor("op_1972_to_fp16"), val = tensor(0x1.6ap-4)]; tensor scores_97_cast_fp16 = mul(x = var_1971_cast_fp16, y = var_1972_to_fp16)[name = tensor("scores_97_cast_fp16")]; tensor scores_99_cast_fp16 = select(a = var_1861_to_fp16, b = scores_97_cast_fp16, cond = var_313_cast_fp16)[name = tensor("scores_99_cast_fp16")]; tensor probs_39_cast_fp16 = softmax(axis = var_1866, x = scores_99_cast_fp16)[name = tensor("probs_39_cast_fp16")]; tensor var_1981_transpose_x_0 = const()[name = tensor("op_1981_transpose_x_0"), val = tensor(false)]; tensor var_1981_transpose_y_0 = const()[name = tensor("op_1981_transpose_y_0"), val = tensor(false)]; tensor vT_39_cast_fp16 = transpose(perm = var_1969, x = v_19_cast_fp16)[name = tensor("transpose_167")]; tensor var_1981_cast_fp16 = matmul(transpose_x = var_1981_transpose_x_0, transpose_y = var_1981_transpose_y_0, x = probs_39_cast_fp16, y = vT_39_cast_fp16)[name = tensor("op_1981_cast_fp16")]; tensor var_1982 = const()[name = tensor("op_1982"), val = tensor([0, 2, 1, 3])]; tensor var_1984 = const()[name = tensor("op_1984"), val = tensor([1, 111, -1])]; tensor var_1983_cast_fp16 = transpose(perm = var_1982, x = var_1981_cast_fp16)[name = tensor("transpose_166")]; tensor input_135_cast_fp16 = reshape(shape = var_1984, x = var_1983_cast_fp16)[name = tensor("input_135_cast_fp16")]; tensor dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70665920))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70764288))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized, x = input_135_cast_fp16)[name = tensor("linear_49_cast_fp16")]; tensor input_137_cast_fp16 = add(x = input_133_cast_fp16, y = linear_49_cast_fp16)[name = tensor("input_137_cast_fp16")]; tensor x_151_axes_0 = const()[name = tensor("x_151_axes_0"), val = tensor([-1])]; tensor dec_layers_9_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_9_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70765888)))]; tensor x_151_cast_fp16 = layer_norm(axes = x_151_axes_0, epsilon = var_1864_to_fp16, gamma = dec_layers_9_norm_pos_ff_weight_to_fp16, x = input_137_cast_fp16)[name = tensor("x_151_cast_fp16")]; tensor var_2001 = const()[name = tensor("op_2001"), val = tensor([0, 2, 1])]; tensor y_57_pad_type_0 = const()[name = tensor("y_57_pad_type_0"), val = tensor("valid")]; tensor y_57_strides_0 = const()[name = tensor("y_57_strides_0"), val = tensor([1])]; tensor y_57_pad_0 = const()[name = tensor("y_57_pad_0"), val = tensor([0, 0])]; tensor y_57_dilations_0 = const()[name = tensor("y_57_dilations_0"), val = tensor([1])]; tensor y_57_groups_0 = const()[name = tensor("y_57_groups_0"), val = tensor(1)]; tensor dec_layers_9_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_9_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70767488))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73126848))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5672960)))]; tensor x_155_cast_fp16 = transpose(perm = var_2001, x = x_151_cast_fp16)[name = tensor("transpose_165")]; tensor y_57_cast_fp16 = conv(dilations = y_57_dilations_0, groups = y_57_groups_0, pad = y_57_pad_0, pad_type = y_57_pad_type_0, strides = y_57_strides_0, weight = dec_layers_9_pos_ff_proj_weight_to_fp16_quantized, x = x_155_cast_fp16)[name = tensor("y_57_cast_fp16")]; tensor x_157_mode_0 = const()[name = tensor("x_157_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_157_cast_fp16 = gelu(mode = x_157_mode_0, x = y_57_cast_fp16)[name = tensor("x_157_cast_fp16")]; tensor y_59_pad_type_0 = const()[name = tensor("y_59_pad_type_0"), val = tensor("valid")]; tensor y_59_strides_0 = const()[name = tensor("y_59_strides_0"), val = tensor([1])]; tensor y_59_pad_0 = const()[name = tensor("y_59_pad_0"), val = tensor([0, 0])]; tensor y_59_dilations_0 = const()[name = tensor("y_59_dilations_0"), val = tensor([1])]; tensor y_59_groups_0 = const()[name = tensor("y_59_groups_0"), val = tensor(1)]; tensor dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73133056))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(75492416))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor y_59_cast_fp16 = conv(dilations = y_59_dilations_0, groups = y_59_groups_0, pad = y_59_pad_0, pad_type = y_59_pad_type_0, strides = y_59_strides_0, weight = dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized, x = x_157_cast_fp16)[name = tensor("y_59_cast_fp16")]; tensor var_2021 = const()[name = tensor("op_2021"), val = tensor([0, 2, 1])]; tensor h_99_cast_fp16 = transpose(perm = var_2021, x = y_59_cast_fp16)[name = tensor("transpose_164")]; tensor x_161_cast_fp16 = add(x = input_137_cast_fp16, y = h_99_cast_fp16)[name = tensor("x_161_cast_fp16")]; tensor var_2051 = const()[name = tensor("op_2051"), val = tensor(-1)]; tensor input_141_axes_0 = const()[name = tensor("input_141_axes_0"), val = tensor([-1])]; tensor dec_layers_10_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_10_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(75494016)))]; tensor var_2049_to_fp16 = const()[name = tensor("op_2049_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_141_cast_fp16 = layer_norm(axes = input_141_axes_0, epsilon = var_2049_to_fp16, gamma = dec_layers_10_norm_self_weight_to_fp16, x = x_161_cast_fp16)[name = tensor("input_141_cast_fp16")]; tensor dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(75495616))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77265152))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2281088)))]; tensor linear_50_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized, x = input_141_cast_fp16)[name = tensor("linear_50_cast_fp16")]; tensor var_2070 = const()[name = tensor("op_2070"), val = tensor([1, 111, 3, 12, 64])]; tensor qkv_43_cast_fp16 = reshape(shape = var_2070, x = linear_50_cast_fp16)[name = tensor("qkv_43_cast_fp16")]; tensor q_81_begin_0 = const()[name = tensor("q_81_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_81_end_0 = const()[name = tensor("q_81_end_0"), val = tensor([1, 111, 1, 12, 64])]; tensor q_81_end_mask_0 = const()[name = tensor("q_81_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_81_squeeze_mask_0 = const()[name = tensor("q_81_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_81_cast_fp16 = slice_by_index(begin = q_81_begin_0, end = q_81_end_0, end_mask = q_81_end_mask_0, squeeze_mask = q_81_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor("q_81_cast_fp16")]; tensor new_k_21_begin_0 = const()[name = tensor("new_k_21_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_21_end_0 = const()[name = tensor("new_k_21_end_0"), val = tensor([1, 111, 2, 12, 64])]; tensor new_k_21_end_mask_0 = const()[name = tensor("new_k_21_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_21_squeeze_mask_0 = const()[name = tensor("new_k_21_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_21_cast_fp16 = slice_by_index(begin = new_k_21_begin_0, end = new_k_21_end_0, end_mask = new_k_21_end_mask_0, squeeze_mask = new_k_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor("new_k_21_cast_fp16")]; tensor new_k_21_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_k_21_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor new_v_21_begin_0 = const()[name = tensor("new_v_21_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_21_end_0 = const()[name = tensor("new_v_21_end_0"), val = tensor([1, 111, 3, 12, 64])]; tensor new_v_21_end_mask_0 = const()[name = tensor("new_v_21_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_21_squeeze_mask_0 = const()[name = tensor("new_v_21_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_21_cast_fp16 = slice_by_index(begin = new_v_21_begin_0, end = new_v_21_end_0, end_mask = new_v_21_end_mask_0, squeeze_mask = new_v_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor("new_v_21_cast_fp16")]; tensor new_v_21_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_v_21_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_2085 = const()[name = tensor("op_2085"), val = tensor([0, 2, -3, -1])]; tensor var_2087_transpose_x_0 = const()[name = tensor("op_2087_transpose_x_0"), val = tensor(false)]; tensor var_2087_transpose_y_0 = const()[name = tensor("op_2087_transpose_y_0"), val = tensor(false)]; tensor transpose_136_perm_0 = const()[name = tensor("transpose_136_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_137_perm_0 = const()[name = tensor("transpose_137_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_137 = transpose(perm = transpose_137_perm_0, x = new_k_21_cast_fp16)[name = tensor("transpose_162")]; tensor transpose_136 = transpose(perm = transpose_136_perm_0, x = q_81_cast_fp16)[name = tensor("transpose_163")]; tensor var_2087_cast_fp16 = matmul(transpose_x = var_2087_transpose_x_0, transpose_y = var_2087_transpose_y_0, x = transpose_136, y = transpose_137)[name = tensor("op_2087_cast_fp16")]; tensor var_2088_to_fp16 = const()[name = tensor("op_2088_to_fp16"), val = tensor(0x1p-3)]; tensor scores_101_cast_fp16 = mul(x = var_2087_cast_fp16, y = var_2088_to_fp16)[name = tensor("scores_101_cast_fp16")]; tensor var_2046_to_fp16 = const()[name = tensor("op_2046_to_fp16"), val = tensor(-inf)]; tensor scores_103_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_101_cast_fp16)[name = tensor("scores_103_cast_fp16")]; tensor probs_41_cast_fp16 = softmax(axis = var_2051, x = scores_103_cast_fp16)[name = tensor("probs_41_cast_fp16")]; tensor var_2108_transpose_x_0 = const()[name = tensor("op_2108_transpose_x_0"), val = tensor(false)]; tensor var_2108_transpose_y_0 = const()[name = tensor("op_2108_transpose_y_0"), val = tensor(false)]; tensor vT_41_cast_fp16 = transpose(perm = var_2085, x = new_v_21_cast_fp16)[name = tensor("transpose_161")]; tensor var_2108_cast_fp16 = matmul(transpose_x = var_2108_transpose_x_0, transpose_y = var_2108_transpose_y_0, x = probs_41_cast_fp16, y = vT_41_cast_fp16)[name = tensor("op_2108_cast_fp16")]; tensor var_2109 = const()[name = tensor("op_2109"), val = tensor([0, 2, 1, 3])]; tensor var_2113 = const()[name = tensor("op_2113"), val = tensor([1, 111, -1])]; tensor y_61_cast_fp16 = transpose(perm = var_2109, x = var_2108_cast_fp16)[name = tensor("transpose_160")]; tensor input_143_cast_fp16 = reshape(shape = var_2113, x = y_61_cast_fp16)[name = tensor("input_143_cast_fp16")]; tensor dec_layers_10_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_10_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77269824))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77859712))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_51_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_10_self_attention_o_net_weight_to_fp16_quantized, x = input_143_cast_fp16)[name = tensor("linear_51_cast_fp16")]; tensor input_145_cast_fp16 = add(x = x_161_cast_fp16, y = linear_51_cast_fp16)[name = tensor("input_145_cast_fp16")]; tensor x_165_axes_0 = const()[name = tensor("x_165_axes_0"), val = tensor([-1])]; tensor dec_layers_10_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_10_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77861312)))]; tensor x_165_cast_fp16 = layer_norm(axes = x_165_axes_0, epsilon = var_2049_to_fp16, gamma = dec_layers_10_norm_xattn_query_weight_to_fp16, x = input_145_cast_fp16)[name = tensor("x_165_cast_fp16")]; tensor memory_21_axes_0 = const()[name = tensor("memory_21_axes_0"), val = tensor([-1])]; tensor dec_layers_10_norm_xattn_memory_weight_to_fp16 = const()[name = tensor("dec_layers_10_norm_xattn_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77862912)))]; tensor memory_21_cast_fp16 = layer_norm(axes = memory_21_axes_0, epsilon = var_2049_to_fp16, gamma = dec_layers_10_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor("memory_21_cast_fp16")]; tensor dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77864512))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77962880))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3013056)))]; tensor linear_52_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized, x = x_165_cast_fp16)[name = tensor("linear_52_cast_fp16")]; tensor var_2135 = const()[name = tensor("op_2135"), val = tensor([1, 111, 1, 128])]; tensor q_85_cast_fp16 = reshape(shape = var_2135, x = linear_52_cast_fp16)[name = tensor("q_85_cast_fp16")]; tensor dec_layers_10_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_10_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77963200))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78159872))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3210560)))]; tensor linear_53_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_10_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_21_cast_fp16)[name = tensor("linear_53_cast_fp16")]; tensor var_2141 = const()[name = tensor("op_2141"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_21_cast_fp16 = reshape(shape = var_2141, x = linear_53_cast_fp16)[name = tensor("kv_21_cast_fp16")]; tensor k_21_begin_0 = const()[name = tensor("k_21_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor k_21_end_0 = const()[name = tensor("k_21_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor k_21_end_mask_0 = const()[name = tensor("k_21_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_21_squeeze_mask_0 = const()[name = tensor("k_21_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_21_cast_fp16 = slice_by_index(begin = k_21_begin_0, end = k_21_end_0, end_mask = k_21_end_mask_0, squeeze_mask = k_21_squeeze_mask_0, x = kv_21_cast_fp16)[name = tensor("k_21_cast_fp16")]; tensor k_21_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("k_21_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor v_21_begin_0 = const()[name = tensor("v_21_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor v_21_end_0 = const()[name = tensor("v_21_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor v_21_end_mask_0 = const()[name = tensor("v_21_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_21_squeeze_mask_0 = const()[name = tensor("v_21_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_21_cast_fp16 = slice_by_index(begin = v_21_begin_0, end = v_21_end_0, end_mask = v_21_end_mask_0, squeeze_mask = v_21_squeeze_mask_0, x = kv_21_cast_fp16)[name = tensor("v_21_cast_fp16")]; tensor v_21_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("v_21_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_2154 = const()[name = tensor("op_2154"), val = tensor([0, 2, -3, -1])]; tensor var_2156_transpose_x_0 = const()[name = tensor("op_2156_transpose_x_0"), val = tensor(false)]; tensor var_2156_transpose_y_0 = const()[name = tensor("op_2156_transpose_y_0"), val = tensor(false)]; tensor transpose_138_perm_0 = const()[name = tensor("transpose_138_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_139_perm_0 = const()[name = tensor("transpose_139_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_139 = transpose(perm = transpose_139_perm_0, x = k_21_cast_fp16)[name = tensor("transpose_158")]; tensor transpose_138 = transpose(perm = transpose_138_perm_0, x = q_85_cast_fp16)[name = tensor("transpose_159")]; tensor var_2156_cast_fp16 = matmul(transpose_x = var_2156_transpose_x_0, transpose_y = var_2156_transpose_y_0, x = transpose_138, y = transpose_139)[name = tensor("op_2156_cast_fp16")]; tensor var_2157_to_fp16 = const()[name = tensor("op_2157_to_fp16"), val = tensor(0x1.6ap-4)]; tensor scores_107_cast_fp16 = mul(x = var_2156_cast_fp16, y = var_2157_to_fp16)[name = tensor("scores_107_cast_fp16")]; tensor scores_109_cast_fp16 = select(a = var_2046_to_fp16, b = scores_107_cast_fp16, cond = var_313_cast_fp16)[name = tensor("scores_109_cast_fp16")]; tensor probs_43_cast_fp16 = softmax(axis = var_2051, x = scores_109_cast_fp16)[name = tensor("probs_43_cast_fp16")]; tensor var_2166_transpose_x_0 = const()[name = tensor("op_2166_transpose_x_0"), val = tensor(false)]; tensor var_2166_transpose_y_0 = const()[name = tensor("op_2166_transpose_y_0"), val = tensor(false)]; tensor vT_43_cast_fp16 = transpose(perm = var_2154, x = v_21_cast_fp16)[name = tensor("transpose_157")]; tensor var_2166_cast_fp16 = matmul(transpose_x = var_2166_transpose_x_0, transpose_y = var_2166_transpose_y_0, x = probs_43_cast_fp16, y = vT_43_cast_fp16)[name = tensor("op_2166_cast_fp16")]; tensor var_2167 = const()[name = tensor("op_2167"), val = tensor([0, 2, 1, 3])]; tensor var_2169 = const()[name = tensor("op_2169"), val = tensor([1, 111, -1])]; tensor var_2168_cast_fp16 = transpose(perm = var_2167, x = var_2166_cast_fp16)[name = tensor("transpose_156")]; tensor input_147_cast_fp16 = reshape(shape = var_2169, x = var_2168_cast_fp16)[name = tensor("input_147_cast_fp16")]; tensor dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78160448))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78258816))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_54_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = tensor("linear_54_cast_fp16")]; tensor input_149_cast_fp16 = add(x = input_145_cast_fp16, y = linear_54_cast_fp16)[name = tensor("input_149_cast_fp16")]; tensor x_167_axes_0 = const()[name = tensor("x_167_axes_0"), val = tensor([-1])]; tensor dec_layers_10_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_10_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78260416)))]; tensor x_167_cast_fp16 = layer_norm(axes = x_167_axes_0, epsilon = var_2049_to_fp16, gamma = dec_layers_10_norm_pos_ff_weight_to_fp16, x = input_149_cast_fp16)[name = tensor("x_167_cast_fp16")]; tensor var_2186 = const()[name = tensor("op_2186"), val = tensor([0, 2, 1])]; tensor y_63_pad_type_0 = const()[name = tensor("y_63_pad_type_0"), val = tensor("valid")]; tensor y_63_strides_0 = const()[name = tensor("y_63_strides_0"), val = tensor([1])]; tensor y_63_pad_0 = const()[name = tensor("y_63_pad_0"), val = tensor([0, 0])]; tensor y_63_dilations_0 = const()[name = tensor("y_63_dilations_0"), val = tensor([1])]; tensor y_63_groups_0 = const()[name = tensor("y_63_groups_0"), val = tensor(1)]; tensor dec_layers_10_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_10_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78262016))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80621376))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5672960)))]; tensor x_171_cast_fp16 = transpose(perm = var_2186, x = x_167_cast_fp16)[name = tensor("transpose_155")]; tensor y_63_cast_fp16 = conv(dilations = y_63_dilations_0, groups = y_63_groups_0, pad = y_63_pad_0, pad_type = y_63_pad_type_0, strides = y_63_strides_0, weight = dec_layers_10_pos_ff_proj_weight_to_fp16_quantized, x = x_171_cast_fp16)[name = tensor("y_63_cast_fp16")]; tensor x_173_mode_0 = const()[name = tensor("x_173_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_173_cast_fp16 = gelu(mode = x_173_mode_0, x = y_63_cast_fp16)[name = tensor("x_173_cast_fp16")]; tensor y_65_pad_type_0 = const()[name = tensor("y_65_pad_type_0"), val = tensor("valid")]; tensor y_65_strides_0 = const()[name = tensor("y_65_strides_0"), val = tensor([1])]; tensor y_65_pad_0 = const()[name = tensor("y_65_pad_0"), val = tensor([0, 0])]; tensor y_65_dilations_0 = const()[name = tensor("y_65_dilations_0"), val = tensor([1])]; tensor y_65_groups_0 = const()[name = tensor("y_65_groups_0"), val = tensor(1)]; tensor dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80627584))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82986944))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor y_65_cast_fp16 = conv(dilations = y_65_dilations_0, groups = y_65_groups_0, pad = y_65_pad_0, pad_type = y_65_pad_type_0, strides = y_65_strides_0, weight = dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized, x = x_173_cast_fp16)[name = tensor("y_65_cast_fp16")]; tensor var_2206 = const()[name = tensor("op_2206"), val = tensor([0, 2, 1])]; tensor h_109_cast_fp16 = transpose(perm = var_2206, x = y_65_cast_fp16)[name = tensor("transpose_154")]; tensor x_177_cast_fp16 = add(x = input_149_cast_fp16, y = h_109_cast_fp16)[name = tensor("x_177_cast_fp16")]; tensor var_2236 = const()[name = tensor("op_2236"), val = tensor(-1)]; tensor input_153_axes_0 = const()[name = tensor("input_153_axes_0"), val = tensor([-1])]; tensor dec_layers_11_norm_self_weight_to_fp16 = const()[name = tensor("dec_layers_11_norm_self_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82988544)))]; tensor var_2234_to_fp16 = const()[name = tensor("op_2234_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_153_cast_fp16 = layer_norm(axes = input_153_axes_0, epsilon = var_2234_to_fp16, gamma = dec_layers_11_norm_self_weight_to_fp16, x = x_177_cast_fp16)[name = tensor("input_153_cast_fp16")]; tensor dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82990144))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84759680))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2281088)))]; tensor linear_55_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized, x = input_153_cast_fp16)[name = tensor("linear_55_cast_fp16")]; tensor var_2255 = const()[name = tensor("op_2255"), val = tensor([1, 111, 3, 12, 64])]; tensor qkv_cast_fp16 = reshape(shape = var_2255, x = linear_55_cast_fp16)[name = tensor("qkv_cast_fp16")]; tensor q_89_begin_0 = const()[name = tensor("q_89_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor q_89_end_0 = const()[name = tensor("q_89_end_0"), val = tensor([1, 111, 1, 12, 64])]; tensor q_89_end_mask_0 = const()[name = tensor("q_89_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor q_89_squeeze_mask_0 = const()[name = tensor("q_89_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor q_89_cast_fp16 = slice_by_index(begin = q_89_begin_0, end = q_89_end_0, end_mask = q_89_end_mask_0, squeeze_mask = q_89_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor("q_89_cast_fp16")]; tensor new_k_begin_0 = const()[name = tensor("new_k_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor new_k_end_0 = const()[name = tensor("new_k_end_0"), val = tensor([1, 111, 2, 12, 64])]; tensor new_k_end_mask_0 = const()[name = tensor("new_k_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_k_squeeze_mask_0 = const()[name = tensor("new_k_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_k_cast_fp16 = slice_by_index(begin = new_k_begin_0, end = new_k_end_0, end_mask = new_k_end_mask_0, squeeze_mask = new_k_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor("new_k_cast_fp16")]; tensor new_k_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_k_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor new_v_begin_0 = const()[name = tensor("new_v_begin_0"), val = tensor([0, 0, 2, 0, 0])]; tensor new_v_end_0 = const()[name = tensor("new_v_end_0"), val = tensor([1, 111, 3, 12, 64])]; tensor new_v_end_mask_0 = const()[name = tensor("new_v_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor new_v_squeeze_mask_0 = const()[name = tensor("new_v_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor new_v_cast_fp16 = slice_by_index(begin = new_v_begin_0, end = new_v_end_0, end_mask = new_v_end_mask_0, squeeze_mask = new_v_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor("new_v_cast_fp16")]; tensor new_v_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("new_v_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_2270 = const()[name = tensor("op_2270"), val = tensor([0, 2, -3, -1])]; tensor var_2272_transpose_x_0 = const()[name = tensor("op_2272_transpose_x_0"), val = tensor(false)]; tensor var_2272_transpose_y_0 = const()[name = tensor("op_2272_transpose_y_0"), val = tensor(false)]; tensor transpose_140_perm_0 = const()[name = tensor("transpose_140_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_141_perm_0 = const()[name = tensor("transpose_141_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_141 = transpose(perm = transpose_141_perm_0, x = new_k_cast_fp16)[name = tensor("transpose_152")]; tensor transpose_140 = transpose(perm = transpose_140_perm_0, x = q_89_cast_fp16)[name = tensor("transpose_153")]; tensor var_2272_cast_fp16 = matmul(transpose_x = var_2272_transpose_x_0, transpose_y = var_2272_transpose_y_0, x = transpose_140, y = transpose_141)[name = tensor("op_2272_cast_fp16")]; tensor var_2273_to_fp16 = const()[name = tensor("op_2273_to_fp16"), val = tensor(0x1p-3)]; tensor scores_111_cast_fp16 = mul(x = var_2272_cast_fp16, y = var_2273_to_fp16)[name = tensor("scores_111_cast_fp16")]; tensor var_2231_to_fp16 = const()[name = tensor("op_2231_to_fp16"), val = tensor(-inf)]; tensor scores_113_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_111_cast_fp16)[name = tensor("scores_113_cast_fp16")]; tensor probs_45_cast_fp16 = softmax(axis = var_2236, x = scores_113_cast_fp16)[name = tensor("probs_45_cast_fp16")]; tensor var_2293_transpose_x_0 = const()[name = tensor("op_2293_transpose_x_0"), val = tensor(false)]; tensor var_2293_transpose_y_0 = const()[name = tensor("op_2293_transpose_y_0"), val = tensor(false)]; tensor vT_45_cast_fp16 = transpose(perm = var_2270, x = new_v_cast_fp16)[name = tensor("transpose_151")]; tensor var_2293_cast_fp16 = matmul(transpose_x = var_2293_transpose_x_0, transpose_y = var_2293_transpose_y_0, x = probs_45_cast_fp16, y = vT_45_cast_fp16)[name = tensor("op_2293_cast_fp16")]; tensor var_2294 = const()[name = tensor("op_2294"), val = tensor([0, 2, 1, 3])]; tensor var_2298 = const()[name = tensor("op_2298"), val = tensor([1, 111, -1])]; tensor y_67_cast_fp16 = transpose(perm = var_2294, x = var_2293_cast_fp16)[name = tensor("transpose_150")]; tensor input_155_cast_fp16 = reshape(shape = var_2298, x = y_67_cast_fp16)[name = tensor("input_155_cast_fp16")]; tensor dec_layers_11_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_11_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84764352))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85354240))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_56_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_11_self_attention_o_net_weight_to_fp16_quantized, x = input_155_cast_fp16)[name = tensor("linear_56_cast_fp16")]; tensor input_157_cast_fp16 = add(x = x_177_cast_fp16, y = linear_56_cast_fp16)[name = tensor("input_157_cast_fp16")]; tensor x_181_axes_0 = const()[name = tensor("x_181_axes_0"), val = tensor([-1])]; tensor dec_layers_11_norm_xattn_query_weight_to_fp16 = const()[name = tensor("dec_layers_11_norm_xattn_query_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85355840)))]; tensor x_181_cast_fp16 = layer_norm(axes = x_181_axes_0, epsilon = var_2234_to_fp16, gamma = dec_layers_11_norm_xattn_query_weight_to_fp16, x = input_157_cast_fp16)[name = tensor("x_181_cast_fp16")]; tensor memory_axes_0 = const()[name = tensor("memory_axes_0"), val = tensor([-1])]; tensor dec_layers_11_norm_xattn_memory_weight_to_fp16 = const()[name = tensor("dec_layers_11_norm_xattn_memory_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85357440)))]; tensor memory_cast_fp16 = layer_norm(axes = memory_axes_0, epsilon = var_2234_to_fp16, gamma = dec_layers_11_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor("memory_cast_fp16")]; tensor dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85359040))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85457408))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3013056)))]; tensor linear_57_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized, x = x_181_cast_fp16)[name = tensor("linear_57_cast_fp16")]; tensor var_2320 = const()[name = tensor("op_2320"), val = tensor([1, 111, 1, 128])]; tensor q_93_cast_fp16 = reshape(shape = var_2320, x = linear_57_cast_fp16)[name = tensor("q_93_cast_fp16")]; tensor dec_layers_11_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_11_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85457728))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85654400))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3210560)))]; tensor linear_58_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_11_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_cast_fp16)[name = tensor("linear_58_cast_fp16")]; tensor var_2326 = const()[name = tensor("op_2326"), val = tensor([1, 256, 2, 1, 128])]; tensor kv_cast_fp16 = reshape(shape = var_2326, x = linear_58_cast_fp16)[name = tensor("kv_cast_fp16")]; tensor k_begin_0 = const()[name = tensor("k_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor k_end_0 = const()[name = tensor("k_end_0"), val = tensor([1, 256, 1, 1, 128])]; tensor k_end_mask_0 = const()[name = tensor("k_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor k_squeeze_mask_0 = const()[name = tensor("k_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor k_cast_fp16 = slice_by_index(begin = k_begin_0, end = k_end_0, end_mask = k_end_mask_0, squeeze_mask = k_squeeze_mask_0, x = kv_cast_fp16)[name = tensor("k_cast_fp16")]; tensor k_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("k_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor v_begin_0 = const()[name = tensor("v_begin_0"), val = tensor([0, 0, 1, 0, 0])]; tensor v_end_0 = const()[name = tensor("v_end_0"), val = tensor([1, 256, 2, 1, 128])]; tensor v_end_mask_0 = const()[name = tensor("v_end_mask_0"), val = tensor([true, true, false, true, true])]; tensor v_squeeze_mask_0 = const()[name = tensor("v_squeeze_mask_0"), val = tensor([false, false, true, false, false])]; tensor v_cast_fp16 = slice_by_index(begin = v_begin_0, end = v_end_0, end_mask = v_end_mask_0, squeeze_mask = v_squeeze_mask_0, x = kv_cast_fp16)[name = tensor("v_cast_fp16")]; tensor v_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("v_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor var_2339 = const()[name = tensor("op_2339"), val = tensor([0, 2, -3, -1])]; tensor var_2341_transpose_x_0 = const()[name = tensor("op_2341_transpose_x_0"), val = tensor(false)]; tensor var_2341_transpose_y_0 = const()[name = tensor("op_2341_transpose_y_0"), val = tensor(false)]; tensor transpose_142_perm_0 = const()[name = tensor("transpose_142_perm_0"), val = tensor([0, 2, -3, -1])]; tensor transpose_143_perm_0 = const()[name = tensor("transpose_143_perm_0"), val = tensor([0, 2, -1, -3])]; tensor transpose_143 = transpose(perm = transpose_143_perm_0, x = k_cast_fp16)[name = tensor("transpose_148")]; tensor transpose_142 = transpose(perm = transpose_142_perm_0, x = q_93_cast_fp16)[name = tensor("transpose_149")]; tensor var_2341_cast_fp16 = matmul(transpose_x = var_2341_transpose_x_0, transpose_y = var_2341_transpose_y_0, x = transpose_142, y = transpose_143)[name = tensor("op_2341_cast_fp16")]; tensor var_2342_to_fp16 = const()[name = tensor("op_2342_to_fp16"), val = tensor(0x1.6ap-4)]; tensor scores_117_cast_fp16 = mul(x = var_2341_cast_fp16, y = var_2342_to_fp16)[name = tensor("scores_117_cast_fp16")]; tensor scores_cast_fp16 = select(a = var_2231_to_fp16, b = scores_117_cast_fp16, cond = var_313_cast_fp16)[name = tensor("scores_cast_fp16")]; tensor probs_cast_fp16 = softmax(axis = var_2236, x = scores_cast_fp16)[name = tensor("probs_cast_fp16")]; tensor var_2351_transpose_x_0 = const()[name = tensor("op_2351_transpose_x_0"), val = tensor(false)]; tensor var_2351_transpose_y_0 = const()[name = tensor("op_2351_transpose_y_0"), val = tensor(false)]; tensor vT_cast_fp16 = transpose(perm = var_2339, x = v_cast_fp16)[name = tensor("transpose_147")]; tensor var_2351_cast_fp16 = matmul(transpose_x = var_2351_transpose_x_0, transpose_y = var_2351_transpose_y_0, x = probs_cast_fp16, y = vT_cast_fp16)[name = tensor("op_2351_cast_fp16")]; tensor var_2352 = const()[name = tensor("op_2352"), val = tensor([0, 2, 1, 3])]; tensor var_2354 = const()[name = tensor("op_2354"), val = tensor([1, 111, -1])]; tensor var_2353_cast_fp16 = transpose(perm = var_2352, x = var_2351_cast_fp16)[name = tensor("transpose_146")]; tensor input_159_cast_fp16 = reshape(shape = var_2354, x = var_2353_cast_fp16)[name = tensor("input_159_cast_fp16")]; tensor dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85654976))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85753344))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor linear_59_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized, x = input_159_cast_fp16)[name = tensor("linear_59_cast_fp16")]; tensor input_161_cast_fp16 = add(x = input_157_cast_fp16, y = linear_59_cast_fp16)[name = tensor("input_161_cast_fp16")]; tensor x_183_axes_0 = const()[name = tensor("x_183_axes_0"), val = tensor([-1])]; tensor dec_layers_11_norm_pos_ff_weight_to_fp16 = const()[name = tensor("dec_layers_11_norm_pos_ff_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85754944)))]; tensor x_183_cast_fp16 = layer_norm(axes = x_183_axes_0, epsilon = var_2234_to_fp16, gamma = dec_layers_11_norm_pos_ff_weight_to_fp16, x = input_161_cast_fp16)[name = tensor("x_183_cast_fp16")]; tensor var_2371 = const()[name = tensor("op_2371"), val = tensor([0, 2, 1])]; tensor y_69_pad_type_0 = const()[name = tensor("y_69_pad_type_0"), val = tensor("valid")]; tensor y_69_strides_0 = const()[name = tensor("y_69_strides_0"), val = tensor([1])]; tensor y_69_pad_0 = const()[name = tensor("y_69_pad_0"), val = tensor([0, 0])]; tensor y_69_dilations_0 = const()[name = tensor("y_69_dilations_0"), val = tensor([1])]; tensor y_69_groups_0 = const()[name = tensor("y_69_groups_0"), val = tensor(1)]; tensor dec_layers_11_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_11_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85756544))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88115904))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5672960)))]; tensor x_187_cast_fp16 = transpose(perm = var_2371, x = x_183_cast_fp16)[name = tensor("transpose_145")]; tensor y_69_cast_fp16 = conv(dilations = y_69_dilations_0, groups = y_69_groups_0, pad = y_69_pad_0, pad_type = y_69_pad_type_0, strides = y_69_strides_0, weight = dec_layers_11_pos_ff_proj_weight_to_fp16_quantized, x = x_187_cast_fp16)[name = tensor("y_69_cast_fp16")]; tensor x_189_mode_0 = const()[name = tensor("x_189_mode_0"), val = tensor("TANH_APPROXIMATION")]; tensor x_189_cast_fp16 = gelu(mode = x_189_mode_0, x = y_69_cast_fp16)[name = tensor("x_189_cast_fp16")]; tensor y_pad_type_0 = const()[name = tensor("y_pad_type_0"), val = tensor("valid")]; tensor y_strides_0 = const()[name = tensor("y_strides_0"), val = tensor([1])]; tensor y_pad_0 = const()[name = tensor("y_pad_0"), val = tensor([0, 0])]; tensor y_dilations_0 = const()[name = tensor("y_dilations_0"), val = tensor([1])]; tensor y_groups_0 = const()[name = tensor("y_groups_0"), val = tensor(1)]; tensor dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88122112))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90481472))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(2907456)))]; tensor y_cast_fp16 = conv(dilations = y_dilations_0, groups = y_groups_0, pad = y_pad_0, pad_type = y_pad_type_0, strides = y_strides_0, weight = dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized, x = x_189_cast_fp16)[name = tensor("y_cast_fp16")]; tensor var_2391 = const()[name = tensor("op_2391"), val = tensor([0, 2, 1])]; tensor h_cast_fp16 = transpose(perm = var_2391, x = y_cast_fp16)[name = tensor("transpose_144")]; tensor x_193_cast_fp16 = add(x = input_161_cast_fp16, y = h_cast_fp16)[name = tensor("x_193_cast_fp16")]; tensor x_axes_0 = const()[name = tensor("x_axes_0"), val = tensor([-1])]; tensor dec_norm_out_weight_to_fp16 = const()[name = tensor("dec_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90483072)))]; tensor var_2405_to_fp16 = const()[name = tensor("op_2405_to_fp16"), val = tensor(0x1.5p-17)]; tensor x_cast_fp16 = layer_norm(axes = x_axes_0, epsilon = var_2405_to_fp16, gamma = dec_norm_out_weight_to_fp16, x = x_193_cast_fp16)[name = tensor("x_cast_fp16")]; tensor var_2420_begin_0 = const()[name = tensor("op_2420_begin_0"), val = tensor([0, -1, 0])]; tensor var_2420_end_0 = const()[name = tensor("op_2420_end_0"), val = tensor([1, 111, 768])]; tensor var_2420_end_mask_0 = const()[name = tensor("op_2420_end_mask_0"), val = tensor([true, true, true])]; tensor var_2420_cast_fp16 = slice_by_index(begin = var_2420_begin_0, end = var_2420_end_0, end_mask = var_2420_end_mask_0, x = x_cast_fp16)[name = tensor("op_2420_cast_fp16")]; tensor var_2425_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("op_2425_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor sa_k_0 = cast(dtype = new_k_1_cast_fp16_to_fp32_dtype_0, x = new_k_1_cast_fp16)[name = tensor("cast_1")]; tensor sa_v_0 = cast(dtype = new_v_1_cast_fp16_to_fp32_dtype_0, x = new_v_1_cast_fp16)[name = tensor("cast_2")]; tensor xa_k_0 = cast(dtype = k_1_cast_fp16_to_fp32_dtype_0, x = k_1_cast_fp16)[name = tensor("cast_3")]; tensor xa_v_0 = cast(dtype = v_1_cast_fp16_to_fp32_dtype_0, x = v_1_cast_fp16)[name = tensor("cast_4")]; tensor sa_k_1 = cast(dtype = new_k_3_cast_fp16_to_fp32_dtype_0, x = new_k_3_cast_fp16)[name = tensor("cast_5")]; tensor sa_v_1 = cast(dtype = new_v_3_cast_fp16_to_fp32_dtype_0, x = new_v_3_cast_fp16)[name = tensor("cast_6")]; tensor xa_k_1 = cast(dtype = k_3_cast_fp16_to_fp32_dtype_0, x = k_3_cast_fp16)[name = tensor("cast_7")]; tensor xa_v_1 = cast(dtype = v_3_cast_fp16_to_fp32_dtype_0, x = v_3_cast_fp16)[name = tensor("cast_8")]; tensor sa_k_2 = cast(dtype = new_k_5_cast_fp16_to_fp32_dtype_0, x = new_k_5_cast_fp16)[name = tensor("cast_9")]; tensor sa_v_2 = cast(dtype = new_v_5_cast_fp16_to_fp32_dtype_0, x = new_v_5_cast_fp16)[name = tensor("cast_10")]; tensor xa_k_2 = cast(dtype = k_5_cast_fp16_to_fp32_dtype_0, x = k_5_cast_fp16)[name = tensor("cast_11")]; tensor xa_v_2 = cast(dtype = v_5_cast_fp16_to_fp32_dtype_0, x = v_5_cast_fp16)[name = tensor("cast_12")]; tensor sa_k_3 = cast(dtype = new_k_7_cast_fp16_to_fp32_dtype_0, x = new_k_7_cast_fp16)[name = tensor("cast_13")]; tensor sa_v_3 = cast(dtype = new_v_7_cast_fp16_to_fp32_dtype_0, x = new_v_7_cast_fp16)[name = tensor("cast_14")]; tensor xa_k_3 = cast(dtype = k_7_cast_fp16_to_fp32_dtype_0, x = k_7_cast_fp16)[name = tensor("cast_15")]; tensor xa_v_3 = cast(dtype = v_7_cast_fp16_to_fp32_dtype_0, x = v_7_cast_fp16)[name = tensor("cast_16")]; tensor sa_k_4 = cast(dtype = new_k_9_cast_fp16_to_fp32_dtype_0, x = new_k_9_cast_fp16)[name = tensor("cast_17")]; tensor sa_v_4 = cast(dtype = new_v_9_cast_fp16_to_fp32_dtype_0, x = new_v_9_cast_fp16)[name = tensor("cast_18")]; tensor xa_k_4 = cast(dtype = k_9_cast_fp16_to_fp32_dtype_0, x = k_9_cast_fp16)[name = tensor("cast_19")]; tensor xa_v_4 = cast(dtype = v_9_cast_fp16_to_fp32_dtype_0, x = v_9_cast_fp16)[name = tensor("cast_20")]; tensor sa_k_5 = cast(dtype = new_k_11_cast_fp16_to_fp32_dtype_0, x = new_k_11_cast_fp16)[name = tensor("cast_21")]; tensor sa_v_5 = cast(dtype = new_v_11_cast_fp16_to_fp32_dtype_0, x = new_v_11_cast_fp16)[name = tensor("cast_22")]; tensor xa_k_5 = cast(dtype = k_11_cast_fp16_to_fp32_dtype_0, x = k_11_cast_fp16)[name = tensor("cast_23")]; tensor xa_v_5 = cast(dtype = v_11_cast_fp16_to_fp32_dtype_0, x = v_11_cast_fp16)[name = tensor("cast_24")]; tensor sa_k_6 = cast(dtype = new_k_13_cast_fp16_to_fp32_dtype_0, x = new_k_13_cast_fp16)[name = tensor("cast_25")]; tensor sa_v_6 = cast(dtype = new_v_13_cast_fp16_to_fp32_dtype_0, x = new_v_13_cast_fp16)[name = tensor("cast_26")]; tensor xa_k_6 = cast(dtype = k_13_cast_fp16_to_fp32_dtype_0, x = k_13_cast_fp16)[name = tensor("cast_27")]; tensor xa_v_6 = cast(dtype = v_13_cast_fp16_to_fp32_dtype_0, x = v_13_cast_fp16)[name = tensor("cast_28")]; tensor sa_k_7 = cast(dtype = new_k_15_cast_fp16_to_fp32_dtype_0, x = new_k_15_cast_fp16)[name = tensor("cast_29")]; tensor sa_v_7 = cast(dtype = new_v_15_cast_fp16_to_fp32_dtype_0, x = new_v_15_cast_fp16)[name = tensor("cast_30")]; tensor xa_k_7 = cast(dtype = k_15_cast_fp16_to_fp32_dtype_0, x = k_15_cast_fp16)[name = tensor("cast_31")]; tensor xa_v_7 = cast(dtype = v_15_cast_fp16_to_fp32_dtype_0, x = v_15_cast_fp16)[name = tensor("cast_32")]; tensor sa_k_8 = cast(dtype = new_k_17_cast_fp16_to_fp32_dtype_0, x = new_k_17_cast_fp16)[name = tensor("cast_33")]; tensor sa_v_8 = cast(dtype = new_v_17_cast_fp16_to_fp32_dtype_0, x = new_v_17_cast_fp16)[name = tensor("cast_34")]; tensor xa_k_8 = cast(dtype = k_17_cast_fp16_to_fp32_dtype_0, x = k_17_cast_fp16)[name = tensor("cast_35")]; tensor xa_v_8 = cast(dtype = v_17_cast_fp16_to_fp32_dtype_0, x = v_17_cast_fp16)[name = tensor("cast_36")]; tensor sa_k_9 = cast(dtype = new_k_19_cast_fp16_to_fp32_dtype_0, x = new_k_19_cast_fp16)[name = tensor("cast_37")]; tensor sa_v_9 = cast(dtype = new_v_19_cast_fp16_to_fp32_dtype_0, x = new_v_19_cast_fp16)[name = tensor("cast_38")]; tensor xa_k_9 = cast(dtype = k_19_cast_fp16_to_fp32_dtype_0, x = k_19_cast_fp16)[name = tensor("cast_39")]; tensor xa_v_9 = cast(dtype = v_19_cast_fp16_to_fp32_dtype_0, x = v_19_cast_fp16)[name = tensor("cast_40")]; tensor sa_k_10 = cast(dtype = new_k_21_cast_fp16_to_fp32_dtype_0, x = new_k_21_cast_fp16)[name = tensor("cast_41")]; tensor sa_v_10 = cast(dtype = new_v_21_cast_fp16_to_fp32_dtype_0, x = new_v_21_cast_fp16)[name = tensor("cast_42")]; tensor xa_k_10 = cast(dtype = k_21_cast_fp16_to_fp32_dtype_0, x = k_21_cast_fp16)[name = tensor("cast_43")]; tensor xa_v_10 = cast(dtype = v_21_cast_fp16_to_fp32_dtype_0, x = v_21_cast_fp16)[name = tensor("cast_44")]; tensor sa_k_11 = cast(dtype = new_k_cast_fp16_to_fp32_dtype_0, x = new_k_cast_fp16)[name = tensor("cast_45")]; tensor sa_v_11 = cast(dtype = new_v_cast_fp16_to_fp32_dtype_0, x = new_v_cast_fp16)[name = tensor("cast_46")]; tensor xa_k_11 = cast(dtype = k_cast_fp16_to_fp32_dtype_0, x = k_cast_fp16)[name = tensor("cast_47")]; tensor xa_v_11 = cast(dtype = v_cast_fp16_to_fp32_dtype_0, x = v_cast_fp16)[name = tensor("cast_48")]; tensor h_last = cast(dtype = var_2425_cast_fp16_to_fp32_dtype_0, x = var_2420_cast_fp16)[name = tensor("cast_49")]; } -> (h_last, sa_k_0, sa_k_1, sa_k_2, sa_k_3, sa_k_4, sa_k_5, sa_k_6, sa_k_7, sa_k_8, sa_k_9, sa_k_10, sa_k_11, sa_v_0, sa_v_1, sa_v_2, sa_v_3, sa_v_4, sa_v_5, sa_v_6, sa_v_7, sa_v_8, sa_v_9, sa_v_10, sa_v_11, xa_k_0, xa_k_1, xa_k_2, xa_k_3, xa_k_4, xa_k_5, xa_k_6, xa_k_7, xa_k_8, xa_k_9, xa_k_10, xa_k_11, xa_v_0, xa_v_1, xa_v_2, xa_v_3, xa_v_4, xa_v_5, xa_v_6, xa_v_7, xa_v_8, xa_v_9, xa_v_10, xa_v_11); }